bin/sync-combine-two-yaml-exports-into-one.pl

#!/usr/bin/perl
use warnings;
use strict;
no strict "refs";

use IO::All;
use Encode;
# only  YAML::XS  is able  to  properly  load  our data  (others  fail
# invariably)
use YAML::XS qw(Load);
# only  YAML is  able to  properly  Dump the  data YAML::XS  generates
# various kinds of multiline  entries like "line\nline2\nline3" end up
# literally  in the  generated  yaml,  which note  is  then unable  to
# feed. So,  the pure perl  version is  better as it  always generates
# multiline entries for data containing newlines
use YAML qw(Dump);
use Data::Dumper;

my ($yf1, $yf2) = @ARGV;

# read  both input  files  and  parse yaml  into  data structure,  fix
# non-printables
my $badutf81 < io $yf1;
my $yaml1 = decode( 'UTF-8', $badutf81 =~ s/[^\x00-\x7F]+//gr );
my $y1 = Load $yaml1 or die "Could not load $yf1: $!";

my $badutf82 < io $yf2;
my $yaml2 = decode( 'UTF-8', $badutf82 =~ s/[^\x00-\x7F]+//gr );
my $y2 = Load $yaml2 or die "Could not load $yf2: $!";

# convert to comparable hashes with unique keys
my $hash1 = &hashify($y1);
my $hash2 = &hashify($y2);

# diff and recombine the two into a new one
my $combinedhash = &hash2note(&diff($hash1, $hash2));

#print Dumper($combinedhash); exit;

# turn into yaml
my $combindedyaml = Dump($combinedhash);

# perl uses scalars as hash keys (read: strings) so we need to unquote
# them here to make note happy
$combindedyaml =~ s/^'(\d+)':/$1:/gm;


print $combindedyaml;


sub hash2note {
    # convert given hash into note format with number as key
    my $hash = shift;
    my $new;
    my $i = 0;
    
    foreach my $path (sort keys %{$hash}) {
        $new->{++$i} = $hash->{$path};
    }

    return $new;
}

sub diff {
    # diff the two hashes, create a new combined one
    my($hash1, $hash2) = @_;
    my $new;

    # iterate over hash1, store duplicates and remove them in hash2,
    # store different entries and remove in both,
    # store those missing in hash2 and delete them in hash1
    foreach my $path (sort keys %{$hash1}) {
        if (exists $hash2->{$path}) {
            if ($hash2->{$path}->{body} eq $hash1->{$path}->{body}) {
                printf STDERR "%s => %s is duplicate\n", $path, $hash1->{$path}->{title};
                $new->{$path} = delete $hash2->{$path};
                delete $hash1->{$path};
            }
            else {
                printf STDERR "%s => %s is different\n", $path, $hash1->{$path}->{title};
                $new->{$path} = delete $hash1->{$path};
                $new->{$path . 2} = delete $hash2->{$path};
            }
        }
        else {
            printf STDERR "%s => %s is missing in hash2\n", $path, $hash1->{$path}->{title};
            $new->{$path} = delete $hash1->{$path};
        }
    }

    # store any lefovers of hash1
    foreach my $path (sort keys %{$hash1}) {
        printf STDERR "%s => %s is left in hash1\n", $path, $hash1->{$path}->{title};
        $new->{$path} = $hash1->{$path};
    }

    # store any lefovers of hash2
    foreach my $path (sort keys %{$hash2}) {
        printf STDERR "%s => %s is left in hash2\n", $path, $hash2->{$path}->{title};
        $new->{$path} = $hash2->{$path};
    }

    return $new
}

sub hashify {
    # create new hash with path+title as key instead of id's
    my $data = shift;
    my $new = {};

    foreach my $id (keys %{$data} ) {
        my $path = $data->{$id}->{path} . '|' . $data->{$id}->{title};
        if (exists $new->{$path}) {
            die "$path already exists!\n";
        }
        else {
            $new->{$path} = $data->{$id};
        }
    }

    return $new;
}
Added script to recombine 2 dumps into a new one." 2023-08-11 20:15:36 +02:00			`#!/usr/bin/perl`
			`use warnings;`
			`use strict;`
			`no strict "refs";`

			`use IO::All;`
			`use Encode;`
fixed-newlines 2023-08-12 20:24:18 +02:00			`# only YAML::XS is able to properly load our data (others fail`
			`# invariably)`
			`use YAML::XS qw(Load);`
			`# only YAML is able to properly Dump the data YAML::XS generates`
			`# various kinds of multiline entries like "line\nline2\nline3" end up`
			`# literally in the generated yaml, which note is then unable to`
			`# feed. So, the pure perl version is better as it always generates`
			`# multiline entries for data containing newlines`
			`use YAML qw(Dump);`
Added script to recombine 2 dumps into a new one." 2023-08-11 20:15:36 +02:00			`use Data::Dumper;`

			`my ($yf1, $yf2) = @ARGV;`

			`# read both input files and parse yaml into data structure, fix`
			`# non-printables`
			`my $badutf81 < io $yf1;`
			`my $yaml1 = decode( 'UTF-8', $badutf81 =~ s/[^\x00-\x7F]+//gr );`
			`my $y1 = Load $yaml1 or die "Could not load $yf1: $!";`

			`my $badutf82 < io $yf2;`
			`my $yaml2 = decode( 'UTF-8', $badutf82 =~ s/[^\x00-\x7F]+//gr );`
			`my $y2 = Load $yaml2 or die "Could not load $yf2: $!";`

			`# convert to comparable hashes with unique keys`
			`my $hash1 = &hashify($y1);`
			`my $hash2 = &hashify($y2);`

			`# diff and recombine the two into a new one`
			`my $combinedhash = &hash2note(&diff($hash1, $hash2));`

fixed-newlines 2023-08-12 20:24:18 +02:00			`#print Dumper($combinedhash); exit;`

Added script to recombine 2 dumps into a new one." 2023-08-11 20:15:36 +02:00			`# turn into yaml`
			`my $combindedyaml = Dump($combinedhash);`

			`# perl uses scalars as hash keys (read: strings) so we need to unquote`
			`# them here to make note happy`
			`$combindedyaml =~ s/^'(\d+)':/$1:/gm;`


			`print $combindedyaml;`


			`sub hash2note {`
			`# convert given hash into note format with number as key`
			`my $hash = shift;`
			`my $new;`
			`my $i = 0;`

			`foreach my $path (sort keys %{$hash}) {`
			`$new->{++$i} = $hash->{$path};`
			`}`

			`return $new;`
			`}`

			`sub diff {`
			`# diff the two hashes, create a new combined one`
			`my($hash1, $hash2) = @_;`
			`my $new;`

			`# iterate over hash1, store duplicates and remove them in hash2,`
			`# store different entries and remove in both,`
			`# store those missing in hash2 and delete them in hash1`
			`foreach my $path (sort keys %{$hash1}) {`
			`if (exists $hash2->{$path}) {`
			`if ($hash2->{$path}->{body} eq $hash1->{$path}->{body}) {`
			`printf STDERR "%s => %s is duplicate\n", $path, $hash1->{$path}->{title};`
			`$new->{$path} = delete $hash2->{$path};`
			`delete $hash1->{$path};`
			`}`
			`else {`
			`printf STDERR "%s => %s is different\n", $path, $hash1->{$path}->{title};`
			`$new->{$path} = delete $hash1->{$path};`
			`$new->{$path . 2} = delete $hash2->{$path};`
			`}`
			`}`
			`else {`
			`printf STDERR "%s => %s is missing in hash2\n", $path, $hash1->{$path}->{title};`
			`$new->{$path} = delete $hash1->{$path};`
			`}`
			`}`

			`# store any lefovers of hash1`
			`foreach my $path (sort keys %{$hash1}) {`
			`printf STDERR "%s => %s is left in hash1\n", $path, $hash1->{$path}->{title};`
			`$new->{$path} = $hash1->{$path};`
			`}`

			`# store any lefovers of hash2`
			`foreach my $path (sort keys %{$hash2}) {`
			`printf STDERR "%s => %s is left in hash2\n", $path, $hash2->{$path}->{title};`
			`$new->{$path} = $hash2->{$path};`
			`}`

			`return $new`
			`}`

			`sub hashify {`
			`# create new hash with path+title as key instead of id's`
			`my $data = shift;`
			`my $new = {};`

			`foreach my $id (keys %{$data} ) {`
			`my $path = $data->{$id}->{path} . '\|' . $data->{$id}->{title};`
			`if (exists $new->{$path}) {`
			`die "$path already exists!\n";`
			`}`
			`else {`
			`$new->{$path} = $data->{$id};`
			`}`
			`}`

			`return $new;`
			`}`