fixed rt.cpan.org#113671: recognize BOM at start of a utf8 file

fixed rt.cpan.org#112857: Same Line Closing Block Breaks Parser



git-svn-id: http://dev.catalyst.perl.org/repos/Config-General/trunk@110 be1acefe-a474-0410-9a34-9b3221f2030f
This commit is contained in:
Thomas von Dein
2016-04-18 13:09:45 +00:00
parent 34397e9b04
commit 7a48e048cf
6 changed files with 281 additions and 244 deletions

View File

@@ -38,6 +38,8 @@ use vars qw(@ISA @EXPORT_OK);
use base qw(Exporter);
@EXPORT_OK = qw(ParseConfig SaveConfig SaveConfigString);
use constant _UTF8_BOM => "\x{ef}\x{bb}\x{bf}";
sub new {
#
# create new Config::General object
@@ -407,7 +409,7 @@ sub files {
sub _open {
#
# open the config file, or expand a directory or glob
# open the config file, or expand a directory or glob or include
#
my($this, $basefile, $basepath) = @_;
my $cont;
@@ -433,11 +435,11 @@ sub _open {
# applied patch by AlexK fixing rt.cpan.org#41030
if ( !@include && defined $this->{ConfigPath} ) {
foreach my $dir (@{$this->{ConfigPath}}) {
my ($volume, $path, undef) = splitpath($basefile);
if ( -d catfile( $dir, $path ) ) {
push @include, grep { -f $_ } bsd_glob(catfile($dir, $basefile), GLOB_BRACE | GLOB_QUOTE);
last;
}
my ($volume, $path, undef) = splitpath($basefile);
if ( -d catfile( $dir, $path ) ) {
push @include, grep { -f $_ } bsd_glob(catfile($dir, $basefile), GLOB_BRACE | GLOB_QUOTE);
last;
}
}
}
@@ -449,7 +451,7 @@ sub _open {
# include foo/* shouldn't fail if there isn't anything matching)
# rt.cpan.org#79869: local $this->{IncludeGlob};
for (@include) {
$this->_open($_);
$this->_open($_);
}
return;
}
@@ -460,11 +462,11 @@ sub _open {
if (defined $this->{ConfigPath}) {
# try to find the file within ConfigPath
foreach my $dir (@{$this->{ConfigPath}}) {
if( -e catfile($dir, $basefile) ) {
$configfile = catfile($dir, $basefile);
$found = 1;
last; # found it
}
if( -e catfile($dir, $basefile) ) {
$configfile = catfile($dir, $basefile);
$found = 1;
last; # found it
}
}
}
if (!$found) {
@@ -490,16 +492,9 @@ sub _open {
my $file = catfile($configfile, $_);
if (! exists $this->{files}->{$file} or $this->{IncludeAgain} ) {
# support re-read if used urged us to do so, otherwise ignore the file
if ($this->{UTF8}) {
$fh = IO::File->new;
open( $fh, "<:utf8", $file)
or croak "Config::General: Could not open $file in UTF8 mode!($!)\n";
}
else {
$fh = IO::File->new( $file, 'r') or croak "Config::General: Could not open $file!($!)\n";
}
$this->{files}->{"$file"} = 1;
$this->_read($fh);
$fh = $this->_openfile_for_read($file);
$this->{files}->{"$file"} = 1;
$this->_read($fh);
}
else {
warn "File $file already loaded. Use -IncludeAgain to load it again.\n";
@@ -516,16 +511,7 @@ sub _open {
return;
}
else {
if ($this->{UTF8}) {
$fh = IO::File->new;
open( $fh, "<:utf8", $configfile)
or croak "Config::General: Could not open $configfile in UTF8 mode!($!)\n";
}
else {
$fh = IO::File->new( "$configfile", 'r')
or croak "Config::General: Could not open $configfile!($!)\n";
}
$fh = $this->_openfile_for_read($configfile);
$this->{files}->{$configfile} = 1;
my ($volume, $path, undef) = splitpath($configfile);
@@ -538,6 +524,30 @@ sub _open {
}
sub _openfile_for_read {
#
# actually open a file, turn on utf8 mode if requested by bom
#
my ($this, $file) = @_;
my $fh = IO::File->new( $file, 'r')
or croak "Config::General: Could not open $file!($!)\n";
# attempt to read an initial utf8 byte-order mark (BOM)
my $n_read = sysread $fh, my $read_BOM, length(_UTF8_BOM);
my $has_BOM = $n_read == length(_UTF8_BOM) && $read_BOM eq _UTF8_BOM;
# set utf8 perlio layer if BOM was found or if option -UTF8 is turned on
binmode $fh, ":utf8" if $this->{UTF8} || $has_BOM;
# rewind to beginning of file if we read chars that were not the BOM
sysseek $fh, 0, 0 if $n_read && !$has_BOM;
return $fh;
}
sub _read {
#
# store the config contents in @content
@@ -713,7 +723,7 @@ sub _read {
my $incl_file;
my $path = '';
if ( $this->{IncludeRelative} and defined $this->{CurrentConfigFilePath}) {
$path = $this->{CurrentConfigFilePath};
$path = $this->{CurrentConfigFilePath};
}
elsif (defined $this->{ConfigPath}) {
# fetch pathname of base config file, assuming the 1st one is the path of it
@@ -2113,82 +2123,86 @@ The method B<getall> returns a hash of all values.
You can define a B<block> of options. A B<block> looks much like a block
in the wellknown Apache config format. It starts with E<lt>B<blockname>E<gt> and ends
with E<lt>/B<blockname>E<gt>. An example:
with E<lt>/B<blockname>E<gt>.
<database>
host = muli
user = moare
dbname = modb
dbpass = D4r_9Iu
</database>
A block start and end cannot be on the same line.
An example:
<database>
host = muli
user = moare
dbname = modb
dbpass = D4r_9Iu
</database>
Blocks can also be nested. Here is a more complicated example:
user = hans
server = mc200
db = maxis
passwd = D3rf$
<jonas>
user = tom
db = unknown
host = mila
<tablestructure>
index int(100000)
name char(100)
prename char(100)
city char(100)
status int(10)
allowed moses
allowed ingram
allowed joice
</tablestructure>
</jonas>
user = hans
server = mc200
db = maxis
passwd = D3rf$
<jonas>
user = tom
db = unknown
host = mila
<tablestructure>
index int(100000)
name char(100)
prename char(100)
city char(100)
status int(10)
allowed moses
allowed ingram
allowed joice
</tablestructure>
</jonas>
The hash which the method B<getall> returns look like that:
print Data::Dumper(\%hash);
$VAR1 = {
'passwd' => 'D3rf$',
'jonas' => {
'tablestructure' => {
'prename' => 'char(100)',
'index' => 'int(100000)',
'city' => 'char(100)',
'name' => 'char(100)',
'status' => 'int(10)',
'allowed' => [
'moses',
'ingram',
'joice',
]
},
'host' => 'mila',
'db' => 'unknown',
'user' => 'tom'
},
'db' => 'maxis',
'server' => 'mc200',
'user' => 'hans'
};
print Data::Dumper(\%hash);
$VAR1 = {
'passwd' => 'D3rf$',
'jonas' => {
'tablestructure' => {
'prename' => 'char(100)',
'index' => 'int(100000)',
'city' => 'char(100)',
'name' => 'char(100)',
'status' => 'int(10)',
'allowed' => [
'moses',
'ingram',
'joice',
]
},
'host' => 'mila',
'db' => 'unknown',
'user' => 'tom'
},
'db' => 'maxis',
'server' => 'mc200',
'user' => 'hans'
};
If you have turned on B<-LowerCaseNames> (see new()) then blocks as in the
following example:
<Dir>
<AttriBUTES>
Owner root
</attributes>
</dir>
<Dir>
<AttriBUTES>
Owner root
</attributes>
</dir>
would produce the following hash structure:
$VAR1 = {
'dir' => {
'attributes' => {
'owner => "root",
}
}
};
$VAR1 = {
'dir' => {
'attributes' => {
'owner => "root",
}
}
};
As you can see, the keys inside the config hash are normalized.
@@ -2210,29 +2224,29 @@ create a hashref with the left part of the named block as the key containing
one or more hashrefs with the right part of the block as key containing everything
inside the block(which may again be nested!). As examples says more than words:
# given the following sample
<Directory /usr/frisco>
Limit Deny
Options ExecCgi Index
</Directory>
<Directory /usr/frik>
Limit DenyAll
Options None
</Directory>
# given the following sample
<Directory /usr/frisco>
Limit Deny
Options ExecCgi Index
</Directory>
<Directory /usr/frik>
Limit DenyAll
Options None
</Directory>
# you will get:
$VAR1 = {
'Directory' => {
'/usr/frik' => {
'Options' => 'None',
'Limit' => 'DenyAll'
},
'/usr/frisco' => {
'Options' => 'ExecCgi Index',
'Limit' => 'Deny'
}
}
};
# you will get:
$VAR1 = {
'Directory' => {
'/usr/frik' => {
'Options' => 'None',
'Limit' => 'DenyAll'
},
'/usr/frisco' => {
'Options' => 'ExecCgi Index',
'Limit' => 'Deny'
}
}
};
You cannot have more than one named block with the same name because it will
be stored in a hashref and therefore be overwritten if a block occurs once more.
@@ -2248,28 +2262,28 @@ Sometimes you may need blocknames which have whitespace in their names.
With named blocks this is no problem, as the module only looks for the
first whitespace:
<person hugo gera>
</person>
<person hugo gera>
</person>
would be parsed to:
$VAR1 = {
'person' => {
'hugo gera' => {
},
}
};
$VAR1 = {
'person' => {
'hugo gera' => {
},
}
};
The problem occurs, if you want to have a simple block containing whitespace:
<hugo gera>
</hugo gera>
<hugo gera>
</hugo gera>
This would be parsed as a named block, which is not what you wanted. In this
very case you may use quotation marks to indicate that it is not a named block:
<"hugo gera">
</"hugo gera">
<"hugo gera">
</"hugo gera">
The save() method of the module inserts automatically quotation marks in such
cases.
@@ -2283,12 +2297,12 @@ explicit empty blocks.
Normally you would write this in your config to define an empty
block:
<driver Apache>
</driver>
<driver Apache>
</driver>
To save writing you can also write:
<driver Apache/>
<driver Apache/>
which is the very same as above. This works for normal blocks and
for named blocks.
@@ -2300,45 +2314,45 @@ for named blocks.
You may have more than one line of the same option with different values.
Example:
log log1
log log2
log log2
log log1
log log2
log log2
You will get a scalar if the option occurred only once or an array if it occurred
more than once. If you expect multiple identical options, then you may need to
check if an option occurred more than once:
$allowed = $hash{jonas}->{tablestructure}->{allowed};
if(ref($allowed) eq "ARRAY") {
@ALLOWED = @{$allowed};
else {
@ALLOWED = ($allowed);
}
$allowed = $hash{jonas}->{tablestructure}->{allowed};
if(ref($allowed) eq "ARRAY") {
@ALLOWED = @{$allowed};
else {
@ALLOWED = ($allowed);
}
The same applies to blocks and named blocks too (they are described in more detail
below). For example, if you have the following config:
<dir blah>
user max
</dir>
<dir blah>
user hannes
</dir>
<dir blah>
user max
</dir>
<dir blah>
user hannes
</dir>
then you would end up with a data structure like this:
$VAR1 = {
'dir' => {
'blah' => [
{
'user' => 'max'
},
{
'user' => 'hannes'
}
]
}
};
$VAR1 = {
'dir' => {
'blah' => [
{
'user' => 'max'
},
{
'user' => 'hannes'
}
]
}
};
As you can see, the two identical blocks are stored in a hash which contains
an array(-reference) of hashes.
@@ -2349,16 +2363,16 @@ both identical blocks, then you need to turn the B<new()> parameter B<-MergeDupl
on (see above). The parsed structure of the example above would then look like
this:
$VAR1 = {
'dir' => {
'blah' => {
'user' => [
'max',
'hannes'
]
}
}
};
$VAR1 = {
'dir' => {
'blah' => {
'user' => [
'max',
'hannes'
]
}
}
};
As you can see, there is only one hash "dir->{blah}" containing multiple
"user" entries. As you can also see, turning on B<-MergeDuplicateBlocks>
@@ -2376,7 +2390,7 @@ You may also force a single config line to get parsed into an array by
turning on the option B<-ForceArray> and by surrounding the value of the
config entry by []. Example:
hostlist = [ foo.bar ]
hostlist = [ foo.bar ]
Will be a singlevalue array entry if the option is turned on. If you want
it to remain to be an array you have to turn on B<-ForceArray> during save too.
@@ -2390,11 +2404,11 @@ of the line. The Config::General module will concatenate those lines to one sing
Example:
command = cat /var/log/secure/tripwire | \
mail C<-s> "report from tripwire" \
honey@myotherhost.nl
mail C<-s> "report from tripwire" \
honey@myotherhost.nl
command will become:
"cat /var/log/secure/tripwire | mail C<-s> 'report from twire' honey@myotherhost.nl"
"cat /var/log/secure/tripwire | mail C<-s> 'report from twire' honey@myotherhost.nl"
=head1 HERE DOCUMENTS
@@ -2405,12 +2419,12 @@ identifier must follow a "<<".
Example:
message <<EOF
we want to
remove the
homedir of
root.
EOF
message <<EOF
we want to
remove the
homedir of
root.
EOF
Everything between the two "EOF" strings will be in the option I<message>.
@@ -2421,19 +2435,19 @@ amount of spaces from every line inside the here-document.
Example:
message <<EOF
we want to
remove the
homedir of
root.
EOF
message <<EOF
we want to
remove the
homedir of
root.
EOF
After parsing, message will become:
we want to
remove the
homedir of
root.
we want to
remove the
homedir of
root.
because there were the string " " in front of EOF, which were cut from every
line inside the here-document.
@@ -2445,12 +2459,12 @@ line inside the here-document.
You can include an external file at any position in your config file using the following statement
in your config file:
<<include externalconfig.rc>>
<<include externalconfig.rc>>
If you turned on B<-UseApacheInclude> (see B<new()>), then you can also use the following
statement to include an external file:
include externalconfig.rc
include externalconfig.rc
This file will be inserted at the position where it was found as if the contents of this file
were directly at this position.
@@ -2464,22 +2478,22 @@ working directory. Under some circumstances it maybe possible to
open included files from the directory, where the configfile resides. You need to turn on
the option B<-IncludeRelative> (see B<new()>) if you want that. An example:
my $conf = Config::General(
-ConfigFile => "/etc/crypt.d/server.cfg"
-IncludeRelative => 1
);
my $conf = Config::General(
-ConfigFile => "/etc/crypt.d/server.cfg"
-IncludeRelative => 1
);
/etc/crypt.d/server.cfg:
<<include acl.cfg>>
/etc/crypt.d/server.cfg:
<<include acl.cfg>>
In this example Config::General will try to include I<acl.cfg> from I</etc/crypt.d>:
/etc/crypt.d/acl.cfg
/etc/crypt.d/acl.cfg
The default behavior (if B<-IncludeRelative> is B<not> set!) will be to open just I<acl.cfg>,
wherever it is, i.e. if you did a chdir("/usr/local/etc"), then Config::General will include:
/usr/local/etc/acl.cfg
/usr/local/etc/acl.cfg
Include statements can be case insensitive (added in version 1.25).
@@ -2492,21 +2506,21 @@ so make sure, you're not including the same file from within itself!
Example:
# main.cfg
<object billy>
class=Some::Class
<printers>
include printers.cfg
</printers>
# ...
</object>
<object bob>
class=Another::Class
<printers>
include printers.cfg
</printers>
# ...
</object>
# main.cfg
<object billy>
class=Some::Class
<printers>
include printers.cfg
</printers>
# ...
</object>
<object bob>
class=Another::Class
<printers>
include printers.cfg
</printers>
# ...
</object>
Now C<printers.cfg> will be include in both the C<billy> and C<bob> objects.
@@ -2522,18 +2536,18 @@ tab stops in front of the #.
A comment can also occur after a config statement. Example:
username = max # this is the comment
username = max # this is the comment
If you want to comment out a large block you can use C-style comments. A B</*> signals
the begin of a comment block and the B<*/> signals the end of the comment block.
Example:
user = max # valid option
db = tothemax
/*
user = andors
db = toand
*/
user = max # valid option
db = tothemax
/*
user = andors
db = toand
*/
In this example the second options of user and db will be ignored. Please beware of the fact,
if the Module finds a B</*> string which is the start of a comment block, but no matching
@@ -2542,7 +2556,7 @@ end block, it will ignore the whole rest of the config file!
B<NOTE:> If you require the B<#> character (number sign) to remain in the option value, then
you can use a backslash in front of it, to escape it. Example:
bgcolor = \#ffffcc
bgcolor = \#ffffcc
In this example the value of $config{bgcolor} will be "#ffffcc", Config::General will not treat
the number sign as the begin of a comment because of the leading backslash.
@@ -2558,25 +2572,25 @@ and parsing.
The general aproach works like this:
sub ck {
my($file, $base) = @_;
print "_open() tries $file ... ";
if($file =~ /blah/) {
print "ignored\n";
return (0);
}
else {
print "allowed\n";
return (1, @_);
}
}
sub ck {
my($file, $base) = @_;
print "_open() tries $file ... ";
if($file =~ /blah/) {
print "ignored\n";
return (0);
}
else {
print "allowed\n";
return (1, @_);
}
}
my %c = ParseConfig(
-IncludeGlob => 1,
-UseApacheInclude => 1,
-ConfigFile => shift,
-Plug => { pre_open => *ck }
);
my %c = ParseConfig(
-IncludeGlob => 1,
-UseApacheInclude => 1,
-ConfigFile => shift,
-Plug => { pre_open => *ck }
);
Output:
@@ -2746,7 +2760,7 @@ I recommend you to read the following documents, which are supplied with Perl:
=head1 LICENSE AND COPYRIGHT
Copyright (c) 2000-2015 Thomas Linden
Copyright (c) 2000-2016 Thomas Linden
This library is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.