diff --git a/Changelog b/Changelog index 1beca1b..1c47908 100644 --- a/Changelog +++ b/Changelog @@ -1,4 +1,5 @@ -next - fix rt.cpan.org#113671: ignore utf BOM, if any. +2.61 - fix rt.cpan.org#113671: ignore utf BOM, if any and turn on + UTF8 support if not yet enabled. 2.60 - fix rt.cpan.org#107929: added missing test config. diff --git a/General.pm b/General.pm index 3f1d392..1a5975d 100644 --- a/General.pm +++ b/General.pm @@ -38,6 +38,8 @@ use vars qw(@ISA @EXPORT_OK); use base qw(Exporter); @EXPORT_OK = qw(ParseConfig SaveConfig SaveConfigString); +use constant _UTF8_BOM => "\x{ef}\x{bb}\x{bf}"; + sub new { # # create new Config::General object @@ -407,7 +409,7 @@ sub files { sub _open { # - # open the config file, or expand a directory or glob + # open the config file, or expand a directory or glob or include # my($this, $basefile, $basepath) = @_; my $cont; @@ -433,11 +435,11 @@ sub _open { # applied patch by AlexK fixing rt.cpan.org#41030 if ( !@include && defined $this->{ConfigPath} ) { foreach my $dir (@{$this->{ConfigPath}}) { - my ($volume, $path, undef) = splitpath($basefile); - if ( -d catfile( $dir, $path ) ) { - push @include, grep { -f $_ } bsd_glob(catfile($dir, $basefile), GLOB_BRACE | GLOB_QUOTE); - last; - } + my ($volume, $path, undef) = splitpath($basefile); + if ( -d catfile( $dir, $path ) ) { + push @include, grep { -f $_ } bsd_glob(catfile($dir, $basefile), GLOB_BRACE | GLOB_QUOTE); + last; + } } } @@ -449,7 +451,7 @@ sub _open { # include foo/* shouldn't fail if there isn't anything matching) # rt.cpan.org#79869: local $this->{IncludeGlob}; for (@include) { - $this->_open($_); + $this->_open($_); } return; } @@ -460,11 +462,11 @@ sub _open { if (defined $this->{ConfigPath}) { # try to find the file within ConfigPath foreach my $dir (@{$this->{ConfigPath}}) { - if( -e catfile($dir, $basefile) ) { - $configfile = catfile($dir, $basefile); - $found = 1; - last; # found it - } + if( -e catfile($dir, $basefile) ) { + $configfile = catfile($dir, $basefile); + $found = 1; + last; # found it + } } } if (!$found) { @@ -490,16 +492,9 @@ sub _open { my $file = catfile($configfile, $_); if (! exists $this->{files}->{$file} or $this->{IncludeAgain} ) { # support re-read if used urged us to do so, otherwise ignore the file - if ($this->{UTF8}) { - $fh = IO::File->new; - open( $fh, "<:utf8", $file) - or croak "Config::General: Could not open $file in UTF8 mode!($!)\n"; - } - else { - $fh = IO::File->new( $file, 'r') or croak "Config::General: Could not open $file!($!)\n"; - } - $this->{files}->{"$file"} = 1; - $this->_read($fh); + $fh = $this->_openfile_for_read($file); + $this->{files}->{"$file"} = 1; + $this->_read($fh); } else { warn "File $file already loaded. Use -IncludeAgain to load it again.\n"; @@ -516,16 +511,7 @@ sub _open { return; } else { - if ($this->{UTF8}) { - $fh = IO::File->new; - open( $fh, "<:utf8", $configfile) - or croak "Config::General: Could not open $configfile in UTF8 mode!($!)\n"; - } - else { - $fh = IO::File->new( "$configfile", 'r') - or croak "Config::General: Could not open $configfile!($!)\n"; - } - + $fh = $this->_openfile_for_read($configfile); $this->{files}->{$configfile} = 1; my ($volume, $path, undef) = splitpath($configfile); @@ -538,6 +524,30 @@ sub _open { } +sub _openfile_for_read { + # + # actually open a file, turn on utf8 mode if requested by bom + # + my ($this, $file) = @_; + + my $fh = IO::File->new( $file, 'r') + or croak "Config::General: Could not open $file!($!)\n"; + + # attempt to read an initial utf8 byte-order mark (BOM) + my $n_read = sysread $fh, my $read_BOM, length(_UTF8_BOM); + my $has_BOM = $n_read == length(_UTF8_BOM) && $read_BOM eq _UTF8_BOM; + + # set utf8 perlio layer if BOM was found or if option -UTF8 is turned on + binmode $fh, ":utf8" if $this->{UTF8} || $has_BOM; + + # rewind to beginning of file if we read chars that were not the BOM + sysseek $fh, 0, 0 if $n_read && !$has_BOM; + + return $fh; +} + + + sub _read { # # store the config contents in @content @@ -713,7 +723,7 @@ sub _read { my $incl_file; my $path = ''; if ( $this->{IncludeRelative} and defined $this->{CurrentConfigFilePath}) { - $path = $this->{CurrentConfigFilePath}; + $path = $this->{CurrentConfigFilePath}; } elsif (defined $this->{ConfigPath}) { # fetch pathname of base config file, assuming the 1st one is the path of it @@ -2113,82 +2123,86 @@ The method B returns a hash of all values. You can define a B of options. A B looks much like a block in the wellknown Apache config format. It starts with EBE and ends -with E/BE. An example: +with E/BE. - - host = muli - user = moare - dbname = modb - dbpass = D4r_9Iu - +A block start and end cannot be on the same line. + +An example: + + +host = muli +user = moare +dbname = modb +dbpass = D4r_9Iu + Blocks can also be nested. Here is a more complicated example: - user = hans - server = mc200 - db = maxis - passwd = D3rf$ - - user = tom - db = unknown - host = mila - - index int(100000) - name char(100) - prename char(100) - city char(100) - status int(10) - allowed moses - allowed ingram - allowed joice - - +user = hans +server = mc200 +db = maxis +passwd = D3rf$ + +user = tom +db = unknown +host = mila + +index int(100000) +name char(100) +prename char(100) +city char(100) +status int(10) +allowed moses +allowed ingram +allowed joice + + The hash which the method B returns look like that: - print Data::Dumper(\%hash); - $VAR1 = { - 'passwd' => 'D3rf$', - 'jonas' => { - 'tablestructure' => { - 'prename' => 'char(100)', - 'index' => 'int(100000)', - 'city' => 'char(100)', - 'name' => 'char(100)', - 'status' => 'int(10)', - 'allowed' => [ - 'moses', - 'ingram', - 'joice', - ] - }, - 'host' => 'mila', - 'db' => 'unknown', - 'user' => 'tom' - }, - 'db' => 'maxis', - 'server' => 'mc200', - 'user' => 'hans' - }; +print Data::Dumper(\%hash); +$VAR1 = { +'passwd' => 'D3rf$', +'jonas' => { +'tablestructure' => { +'prename' => 'char(100)', +'index' => 'int(100000)', +'city' => 'char(100)', +'name' => 'char(100)', +'status' => 'int(10)', +'allowed' => [ +'moses', +'ingram', +'joice', +] +}, +'host' => 'mila', +'db' => 'unknown', +'user' => 'tom' +}, +'db' => 'maxis', +'server' => 'mc200', +'user' => 'hans' +}; If you have turned on B<-LowerCaseNames> (see new()) then blocks as in the following example: - - - Owner root - - + + +Owner root + + would produce the following hash structure: - $VAR1 = { - 'dir' => { - 'attributes' => { - 'owner => "root", - } - } - }; +$VAR1 = { +'dir' => { +'attributes' => { +'owner => "root", +} +} +}; As you can see, the keys inside the config hash are normalized. @@ -2210,29 +2224,29 @@ create a hashref with the left part of the named block as the key containing one or more hashrefs with the right part of the block as key containing everything inside the block(which may again be nested!). As examples says more than words: - # given the following sample - - Limit Deny - Options ExecCgi Index - - - Limit DenyAll - Options None - +# given the following sample + +Limit Deny +Options ExecCgi Index + + +Limit DenyAll +Options None + - # you will get: - $VAR1 = { - 'Directory' => { - '/usr/frik' => { - 'Options' => 'None', - 'Limit' => 'DenyAll' - }, - '/usr/frisco' => { - 'Options' => 'ExecCgi Index', - 'Limit' => 'Deny' - } - } - }; +# you will get: +$VAR1 = { +'Directory' => { +'/usr/frik' => { +'Options' => 'None', +'Limit' => 'DenyAll' +}, +'/usr/frisco' => { +'Options' => 'ExecCgi Index', +'Limit' => 'Deny' +} +} +}; You cannot have more than one named block with the same name because it will be stored in a hashref and therefore be overwritten if a block occurs once more. @@ -2248,28 +2262,28 @@ Sometimes you may need blocknames which have whitespace in their names. With named blocks this is no problem, as the module only looks for the first whitespace: - - + + would be parsed to: - $VAR1 = { - 'person' => { - 'hugo gera' => { - }, - } - }; +$VAR1 = { +'person' => { +'hugo gera' => { +}, +} +}; The problem occurs, if you want to have a simple block containing whitespace: - - + + This would be parsed as a named block, which is not what you wanted. In this very case you may use quotation marks to indicate that it is not a named block: - <"hugo gera"> - +<"hugo gera"> + The save() method of the module inserts automatically quotation marks in such cases. @@ -2283,12 +2297,12 @@ explicit empty blocks. Normally you would write this in your config to define an empty block: - - + + To save writing you can also write: - + which is the very same as above. This works for normal blocks and for named blocks. @@ -2300,45 +2314,45 @@ for named blocks. You may have more than one line of the same option with different values. Example: - log log1 - log log2 - log log2 +log log1 +log log2 +log log2 You will get a scalar if the option occurred only once or an array if it occurred more than once. If you expect multiple identical options, then you may need to check if an option occurred more than once: - $allowed = $hash{jonas}->{tablestructure}->{allowed}; - if(ref($allowed) eq "ARRAY") { - @ALLOWED = @{$allowed}; - else { - @ALLOWED = ($allowed); - } +$allowed = $hash{jonas}->{tablestructure}->{allowed}; +if(ref($allowed) eq "ARRAY") { +@ALLOWED = @{$allowed}; +else { +@ALLOWED = ($allowed); +} The same applies to blocks and named blocks too (they are described in more detail below). For example, if you have the following config: - - user max - - - user hannes - + +user max + + +user hannes + then you would end up with a data structure like this: - $VAR1 = { - 'dir' => { - 'blah' => [ - { - 'user' => 'max' - }, - { - 'user' => 'hannes' - } - ] - } - }; +$VAR1 = { +'dir' => { +'blah' => [ +{ +'user' => 'max' +}, +{ +'user' => 'hannes' +} +] +} +}; As you can see, the two identical blocks are stored in a hash which contains an array(-reference) of hashes. @@ -2349,16 +2363,16 @@ both identical blocks, then you need to turn the B parameter B<-MergeDupl on (see above). The parsed structure of the example above would then look like this: - $VAR1 = { - 'dir' => { - 'blah' => { - 'user' => [ - 'max', - 'hannes' - ] - } - } - }; +$VAR1 = { +'dir' => { +'blah' => { +'user' => [ +'max', +'hannes' +] +} +} +}; As you can see, there is only one hash "dir->{blah}" containing multiple "user" entries. As you can also see, turning on B<-MergeDuplicateBlocks> @@ -2376,7 +2390,7 @@ You may also force a single config line to get parsed into an array by turning on the option B<-ForceArray> and by surrounding the value of the config entry by []. Example: - hostlist = [ foo.bar ] +hostlist = [ foo.bar ] Will be a singlevalue array entry if the option is turned on. If you want it to remain to be an array you have to turn on B<-ForceArray> during save too. @@ -2390,11 +2404,11 @@ of the line. The Config::General module will concatenate those lines to one sing Example: command = cat /var/log/secure/tripwire | \ - mail C<-s> "report from tripwire" \ - honey@myotherhost.nl +mail C<-s> "report from tripwire" \ +honey@myotherhost.nl command will become: - "cat /var/log/secure/tripwire | mail C<-s> 'report from twire' honey@myotherhost.nl" +"cat /var/log/secure/tripwire | mail C<-s> 'report from twire' honey@myotherhost.nl" =head1 HERE DOCUMENTS @@ -2405,12 +2419,12 @@ identifier must follow a "<<". Example: - message <. @@ -2421,19 +2435,19 @@ amount of spaces from every line inside the here-document. Example: - message <> +<> If you turned on B<-UseApacheInclude> (see B), then you can also use the following statement to include an external file: - include externalconfig.rc +include externalconfig.rc This file will be inserted at the position where it was found as if the contents of this file were directly at this position. @@ -2464,22 +2478,22 @@ working directory. Under some circumstances it maybe possible to open included files from the directory, where the configfile resides. You need to turn on the option B<-IncludeRelative> (see B) if you want that. An example: - my $conf = Config::General( - -ConfigFile => "/etc/crypt.d/server.cfg" - -IncludeRelative => 1 - ); +my $conf = Config::General( +-ConfigFile => "/etc/crypt.d/server.cfg" +-IncludeRelative => 1 +); - /etc/crypt.d/server.cfg: - <> +/etc/crypt.d/server.cfg: +<> In this example Config::General will try to include I from I: - /etc/crypt.d/acl.cfg +/etc/crypt.d/acl.cfg The default behavior (if B<-IncludeRelative> is B set!) will be to open just I, wherever it is, i.e. if you did a chdir("/usr/local/etc"), then Config::General will include: - /usr/local/etc/acl.cfg +/usr/local/etc/acl.cfg Include statements can be case insensitive (added in version 1.25). @@ -2492,21 +2506,21 @@ so make sure, you're not including the same file from within itself! Example: - # main.cfg - - class=Some::Class - - include printers.cfg - - # ... - - - class=Another::Class - - include printers.cfg - - # ... - +# main.cfg + +class=Some::Class + +include printers.cfg + +# ... + + +class=Another::Class + +include printers.cfg + +# ... + Now C will be include in both the C and C objects. @@ -2522,18 +2536,18 @@ tab stops in front of the #. A comment can also occur after a config statement. Example: - username = max # this is the comment +username = max # this is the comment If you want to comment out a large block you can use C-style comments. A B signals the begin of a comment block and the B<*/> signals the end of the comment block. Example: - user = max # valid option - db = tothemax - /* - user = andors - db = toand - */ +user = max # valid option +db = tothemax +/* +user = andors +db = toand +*/ In this example the second options of user and db will be ignored. Please beware of the fact, if the Module finds a B string which is the start of a comment block, but no matching @@ -2542,7 +2556,7 @@ end block, it will ignore the whole rest of the config file! B If you require the B<#> character (number sign) to remain in the option value, then you can use a backslash in front of it, to escape it. Example: - bgcolor = \#ffffcc +bgcolor = \#ffffcc In this example the value of $config{bgcolor} will be "#ffffcc", Config::General will not treat the number sign as the begin of a comment because of the leading backslash. @@ -2558,25 +2572,25 @@ and parsing. The general aproach works like this: - sub ck { - my($file, $base) = @_; - print "_open() tries $file ... "; - if($file =~ /blah/) { - print "ignored\n"; - return (0); - } - else { - print "allowed\n"; - return (1, @_); - } - } +sub ck { +my($file, $base) = @_; +print "_open() tries $file ... "; +if($file =~ /blah/) { +print "ignored\n"; +return (0); +} +else { +print "allowed\n"; +return (1, @_); +} +} - my %c = ParseConfig( - -IncludeGlob => 1, - -UseApacheInclude => 1, - -ConfigFile => shift, - -Plug => { pre_open => *ck } - ); +my %c = ParseConfig( +-IncludeGlob => 1, +-UseApacheInclude => 1, +-ConfigFile => shift, +-Plug => { pre_open => *ck } +); Output: @@ -2746,7 +2760,7 @@ I recommend you to read the following documents, which are supplied with Perl: =head1 LICENSE AND COPYRIGHT -Copyright (c) 2000-2015 Thomas Linden +Copyright (c) 2000-2016 Thomas Linden This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. diff --git a/MANIFEST b/MANIFEST index 280ff95..34d3655 100644 --- a/MANIFEST +++ b/MANIFEST @@ -51,3 +51,5 @@ t/test.rc t/Tie/IxHash.pm t/Tie/README t/cfg.51 +t/utf8_bom/bar.cfg +t/utf8_bom/foo.cfg diff --git a/t/run.t b/t/run.t index 854b88b..a969fb9 100644 --- a/t/run.t +++ b/t/run.t @@ -8,7 +8,7 @@ use Data::Dumper; -use Test::More tests => 73; +use Test::More tests => 75; #use Test::More qw(no_plan); # ahem, we deliver the test code with a local copy of @@ -18,7 +18,7 @@ use lib qw(t); use Tie::IxHash; my @WARNINGS_FOUND; BEGIN { - $SIG{__WARN__} = sub { diag( "WARN: ", join( '', @_ ) ); push @WARNINGS_FOUND, @_ }; + $SIG{__WARN__} = sub { diag( "WARN: ", join( '', @_ ) ); push @WARNINGS_FOUND, @_ }; } ### 1 @@ -761,3 +761,16 @@ eval { $cfg56->save_file("t/56.out", { "new\nline" => 9, "brack 8 }); }; ok($@, "catch special chars in keys"); + + +# UTF8[BOM] tests +my $cfg57 = "t/utf8_bom/foo.cfg"; +my $expected57 = {foo => {"\x{e9}" => "\x{e8}", bar => {"\x{f4}" => "\x{ee}"}}}; + +for my $bool (0, 1) { + my $conf = Config::General->new(-ConfigFile => $cfg57, + -IncludeRelative => 1, + -UTF8 => $bool); + my %hash = $conf->getall; + is_deeply \%hash, $expected57, "-UTF8 => $bool"; +} diff --git a/t/utf8_bom/bar.cfg b/t/utf8_bom/bar.cfg new file mode 100644 index 0000000..8e1fb6d --- /dev/null +++ b/t/utf8_bom/bar.cfg @@ -0,0 +1,3 @@ + + ô = î + diff --git a/t/utf8_bom/foo.cfg b/t/utf8_bom/foo.cfg new file mode 100644 index 0000000..dfda03f --- /dev/null +++ b/t/utf8_bom/foo.cfg @@ -0,0 +1,4 @@ + + é = è + <> +