From: gregor herrmann Date: Sun, 29 Nov 2015 14:04:25 +0000 (+0100) Subject: Imported Upstream version 3.008 X-Git-Tag: archive/raspbian/4.017+ds-1+rpi1~1^2~3^2~10 X-Git-Url: https://dgit.raspbian.org/?a=commitdiff_plain;h=0b784a6d5ca047063ff50de0ee389b38a6bbd80b;p=libsereal-encoder-perl.git Imported Upstream version 3.008 --- diff --git a/Changes b/Changes index 4c81c17..0d32c09 100644 --- a/Changes +++ b/Changes @@ -5,6 +5,48 @@ Revision history for Perl extension Sereal-Encoder * of the decoder before upgrading to version 3 of the * * encoder! * **************************************************************** +3.008 Nov 27 2015 + * FIX: Make sure that reserializing a data structure created + using alias_varint_under does not produce a corrupted dataset. + Thanks to Iskra for the report. + * FIX: Precedence issue related to sort keys options. Thanks + to Petr Písař for the report and fix. + * FIX: Win32 build issues, thanks to bulk88 for help. + +3.007 Nov 26 2015 + * FIX: sorting should now work with tied hashes, and + be in general faster. + * CHANGED: Sort order for sort_keys=1 is now defined to be + "in order by length of bytes, then by byte order of the + underlying string, then by utd8ness, with non-utf8 first". + This sort order was chosen because it requires the least + operations to perform in the most cases, has a bounded number + of cases where we would have to create any temporary SV's or + to do operations that require us to introspect codepoints in + utf8 strings. IOW purely performance. Note that so long + as you compare data created with the same sort_keys setting + you will ALWAYS get the same order *in process*, regardless + of what you choose. Backwards compatible support for the + old order is available via sort_keys=3. + + ADDED: sort_keys=2 to provide "perl cmp order" + (Its just there because it is easy to do, not for any + good reason.) + ADDED: sort_keys=3 to provide "rev perl cmp order" + + Legacy sort order was equivalent to sort_keys=3, use this + option *ONLY* if you are extremely sensitive to changes in the + sorted order (or "canonical form"). + + * [DECODER] Build fixes for boxes without any Sereal installed (it seems + to be common on certain types of smokers, and not on others). + * Fix issues with serializing blessed scalars where we see the + blessed scalar before we see the reference to it. + * Handle PVLV undefs + * Build fixes for 5.8.9 + * Doc patches + * Optimizations + 3.006 Nov 14 2015 * [DECODER] fix segfaults from heavily corrupted data. Guards against various pathological cases which could cause segfaults diff --git a/Encoder.xs b/Encoder.xs index 4c54de8..b7d8e5a 100644 --- a/Encoder.xs +++ b/Encoder.xs @@ -5,7 +5,7 @@ #include "perl.h" #include "XSUB.h" -#define NEED_newSV_type +#define NEED_newSV_type_GLOBAL #include "ppport.h" #include "srl_encoder.h" @@ -207,6 +207,12 @@ DESTROY(enc) CODE: srl_destroy_encoder(aTHX_ enc); +U32 +flags(enc) + srl_encoder_t *enc; + CODE: + RETVAL = enc->flags; + OUTPUT: RETVAL void encode_sereal(src, opt = NULL) @@ -278,3 +284,5 @@ test() } PTABLE_iter_free(iter); PTABLE_free(tbl); + + diff --git a/MANIFEST b/MANIFEST index e16aeda..e05928f 100644 --- a/MANIFEST +++ b/MANIFEST @@ -5,6 +5,7 @@ author_tools/freeze_thaw_timing.pl author_tools/hobodecoder.pl author_tools/numeric_str_length.c author_tools/stringify_test.c +author_tools/update_flag_consts.pl author_tools/update_from_header.pl author_tools/valgrind.supp Changes @@ -20,6 +21,7 @@ miniz.c miniz.h ppport.h ptable.h +qsort.h snappy/csnappy.h snappy/csnappy_compat.h snappy/csnappy_compress.c @@ -54,6 +56,7 @@ t/020_sort_keys.t t/021_sort_keys_option.t t/022_canonical_refs.t t/030_canonical_vs_test_deep.t +t/040_tied_hash.t t/110_nobless.t t/120_hdr_data.t t/130_freezethaw.t @@ -86,6 +89,8 @@ t/700_roundtrip/v3/snappy_canon.t t/700_roundtrip/v3/snappy_incr.t t/700_roundtrip/v3/snappy_incr_canon.t t/700_roundtrip/v3/sort_keys.t +t/700_roundtrip/v3/sort_keys_perl.t +t/700_roundtrip/v3/sort_keys_perl_rev.t t/700_roundtrip/v3/zlib.t t/700_roundtrip/v3/zlib_force.t t/800_threads.t diff --git a/META.json b/META.json index f300ee5..1edb16b 100644 --- a/META.json +++ b/META.json @@ -4,7 +4,7 @@ "Steffen Mueller , Yves Orton " ], "dynamic_config" : 1, - "generated_by" : "ExtUtils::MakeMaker version 7.0401, CPAN::Meta::Converter version 2.150001", + "generated_by" : "ExtUtils::MakeMaker version 6.66, CPAN::Meta::Converter version 2.133380", "license" : [ "perl_5" ], @@ -19,37 +19,6 @@ "inc" ] }, - "prereqs" : { - "build" : { - "requires" : { - "ExtUtils::ParseXS" : "2.21", - "File::Find" : "0", - "File::Path" : "0" - } - }, - "configure" : { - "requires" : { - "ExtUtils::MakeMaker" : "7.0" - } - }, - "runtime" : { - "requires" : { - "XSLoader" : "0", - "perl" : "5.008" - } - }, - "test" : { - "requires" : { - "Data::Dumper" : "0", - "File::Spec" : "0", - "Scalar::Util" : "0", - "Sereal::Decoder" : "3.00", - "Test::LongString" : "0", - "Test::More" : "0.88", - "Test::Warn" : "0" - } - } - }, "release_status" : "stable", "resources" : { "bugtracker" : { @@ -60,5 +29,5 @@ "url" : "git://github.com/Sereal/Sereal.git" } }, - "version" : "3.006" + "version" : "3.008" } diff --git a/META.yml b/META.yml index 851d595..7c0ec83 100644 --- a/META.yml +++ b/META.yml @@ -2,34 +2,19 @@ abstract: 'Fast, compact, powerful binary serialization' author: - 'Steffen Mueller , Yves Orton ' -build_requires: - Data::Dumper: '0' - ExtUtils::ParseXS: '2.21' - File::Find: '0' - File::Path: '0' - File::Spec: '0' - Scalar::Util: '0' - Sereal::Decoder: '3.00' - Test::LongString: '0' - Test::More: '0.88' - Test::Warn: '0' -configure_requires: - ExtUtils::MakeMaker: '7.0' +build_requires: {} dynamic_config: 1 -generated_by: 'ExtUtils::MakeMaker version 7.0401, CPAN::Meta::Converter version 2.150001' +generated_by: 'ExtUtils::MakeMaker version 6.66, CPAN::Meta::Converter version 2.133380' license: perl meta-spec: url: http://module-build.sourceforge.net/META-spec-v1.4.html - version: '1.4' + version: 1.4 name: Sereal-Encoder no_index: directory: - t - inc -requires: - XSLoader: '0' - perl: '5.008' resources: bugtracker: https://github.com/Sereal/Sereal/issues repository: git://github.com/Sereal/Sereal.git -version: '3.006' +version: 3.008 diff --git a/author_tools/hobodecoder.pl b/author_tools/hobodecoder.pl index a1f920d..b29e692 100644 --- a/author_tools/hobodecoder.pl +++ b/author_tools/hobodecoder.pl @@ -4,6 +4,7 @@ use warnings; use Data::Dumper; use Getopt::Long qw(GetOptions); +use Encode qw(encode_utf8 decode_utf8); our @constants; no warnings 'recursion'; BEGIN { @@ -156,13 +157,14 @@ sub parse_sv { my $len = $o; my $str = _chop_data_prefix( $len ); $done .= $str; - printf "SHORT_BINARY(%u): '%s'\n", $len, $str; + printf "SHORT_BINARY(%u): '%s' (%s)\n", $len, encode_utf8($str), unpack("H*", $str); } elsif ($o == SRL_HDR_BINARY || $o == SRL_HDR_STR_UTF8) { my $l = varint(); my $str = _chop_data_prefix( $l ); # fixme UTF8 $done .= $str; - printf( ($o == SRL_HDR_STR_UTF8 ? "STR_UTF8" : "BINARY")."(%u): '%s'\n", $l, $str); + $str= decode_utf8($str) if $o == SRL_HDR_STR_UTF8; + printf( ($o == SRL_HDR_STR_UTF8 ? "STR_UTF8" : "BINARY")."(%u): '%s' (%s)\n", $l, encode_utf8($str), unpack("H*", encode_utf8($str))); } elsif ($o == SRL_HDR_FLOAT) { printf "FLOAT(%f)\n", parse_float(); diff --git a/author_tools/update_flag_consts.pl b/author_tools/update_flag_consts.pl new file mode 100644 index 0000000..efe39fe --- /dev/null +++ b/author_tools/update_flag_consts.pl @@ -0,0 +1,69 @@ +use strict; +use warnings; +use Data::Dumper; + +my %sets; +my %flag_consts; +my %flag_names; +my (@flags, @static, @volatile); +my $file= "srl_encoder.h"; +{ + open my $fh, "<", $file + or die "Failed to open '$file' for read: $!"; + + while (<$fh>) { + if ( m/#define (SRL_F_(\w+))\s+(.*)/ || /(\w+(VOLATILE_FLAGS))\s+(.*)/ ) { + #print; + my $full_name= $1; + my $name= $2; + my $value= $3; + $name=~s/_?ENCODER_?//g; + $flag_names{$full_name}= $name; + if ($value=~s/UL\z//) { + $flag_consts{$full_name}= 0+eval $value; + } else { + $value =~ s/(SRL_F_\w+)/\$flag_consts{$1}/g; + $sets{"SRL_F_ENCODER_" . $name}= 0+eval $value; + } + } + } + foreach my $key ( sort { $flag_consts{$a} <=> $flag_consts{$b} } keys %flag_consts ) { + if (defined $sets{SRL_F_ENCODER_VOLATILE_FLAGS}) { + my $is_volatile= $flag_consts{$key} & $sets{SRL_F_ENCODER_VOLATILE_FLAGS}; + push @static, $is_volatile ? undef : $flag_names{$key}; + push @volatile, $is_volatile ? $flag_names{$key} : undef; + } + push @flags, $flag_names{$key}; + } +} + +my %consts= (%sets, %flag_consts); +$consts{_FLAG_NAME}= \@flags; +if (@volatile) { + $consts{_FLAG_NAME_VOLATILE}= \@volatile; + $consts{_FLAG_NAME_STATIC}= \@static; +} + +my $infile= "lib/Sereal/Encoder.pm"; +my $outfile= "$infile.new"; +open my $fh,"<", $infile + or die "Failed to read '$infile': $!"; +open my $ofh, ">", $outfile + or die "Failed to write to '$outfile': $!"; +while (<$fh>) { + if (/#begin generated/) { + print $ofh $_; + my $s= Data::Dumper->new([\%consts])->Sortkeys(1)->Terse(1)->Dump(); + chop($s); + $s.="; #end generated\n"; + print $ofh $s; + } + unless(/#begin generated/ ... /#end generated/) { + print $ofh $_; + } +} +close $ofh; +close $fh; +rename $infile, "$infile.bak" or die "Failed to rename '$infile' to '$infile.bak': $!"; +rename $outfile, $infile or die "Failed to rename '$outfile' to '$infile': $!"; + diff --git a/inc/Sereal/BuildTools.pm b/inc/Sereal/BuildTools.pm index 7310fb0..d4e89d7 100644 --- a/inc/Sereal/BuildTools.pm +++ b/inc/Sereal/BuildTools.pm @@ -3,10 +3,14 @@ use strict; use warnings; use Config; +use constant OSNAME => $^O; sub link_files { my $shared_dir = shift; - my $exlude_tests = shift; + my $do_tests = shift || ""; + my $exclude_tests= $do_tests eq "without_tests"; + my $tests_only= $do_tests eq "tests_only"; + # This fires from a git source tree only. # Right now, all devs are on Linux. Feel free to make portable. eval { @@ -20,7 +24,8 @@ sub link_files { my $f = $_; s/^\Q$shared_dir\E\/?// or die $_; return unless $_; - return if $exlude_tests && m#^/?t/#; + return if $exclude_tests && m#^/?t/#; + return if $tests_only && !m#^/?t/#; if (-d $f) { File::Path::mkpath($_) @@ -32,7 +37,14 @@ sub link_files { my $ref = join "/", ("..") x scalar(@d); my $subd = join "/", @d; chdir $subd if length($ref); - symlink(join("/", grep length, $ref, $shared_dir, $subd, $fname), $fname); + my $srcfname = join("/", grep length, $ref, $shared_dir, $subd, $fname); + if (OSNAME eq 'MSWin32') { + die "link($srcfname, $fname) failed: $!" + unless link($srcfname, $fname); #only NTFS implements it + } + else { + symlink($srcfname, $fname); + } chdir($ref) if length($ref); } }, diff --git a/lib/Sereal/Encoder.pm b/lib/Sereal/Encoder.pm index 5c1f1d7..431a131 100644 --- a/lib/Sereal/Encoder.pm +++ b/lib/Sereal/Encoder.pm @@ -5,7 +5,7 @@ use warnings; use Carp qw/croak/; use XSLoader; -our $VERSION = '3.006'; # Don't forget to update the TestCompat set for testing against installed decoders! +our $VERSION = '3.008'; # Don't forget to update the TestCompat set for testing against installed decoders! our $XS_VERSION = $VERSION; $VERSION= eval $VERSION; # not for public consumption, just for testing. @@ -16,9 +16,52 @@ sub _test_compat {return(@$TestCompat, $VERSION)} # Make sure to keep these constants in sync with the C code in srl_encoder.c. # I know they could be exported from C using things like ExtUtils::Constant, # but that's too much of a hassle for just three numbers. -use constant SRL_UNCOMPRESSED => 0; -use constant SRL_SNAPPY => 1; -use constant SRL_ZLIB => 2; +use constant { + SRL_UNCOMPRESSED => 0, + SRL_SNAPPY => 1, + SRL_ZLIB => 2, +}; +use constant #begin generated +{ + 'SRL_F_ALIASED_DEDUPE_STRINGS' => 4096, + 'SRL_F_CANONICAL_REFS' => 32768, + 'SRL_F_COMPRESS_SNAPPY' => 64, + 'SRL_F_COMPRESS_SNAPPY_INCREMENTAL' => 128, + 'SRL_F_COMPRESS_ZLIB' => 256, + 'SRL_F_CROAK_ON_BLESS' => 4, + 'SRL_F_DEDUPE_STRINGS' => 2048, + 'SRL_F_ENABLE_FREEZE_SUPPORT' => 16384, + 'SRL_F_NOWARN_UNKNOWN_OVERLOAD' => 512, + 'SRL_F_NO_BLESS_OBJECTS' => 8192, + 'SRL_F_REUSE_ENCODER' => 2, + 'SRL_F_SHARED_HASHKEYS' => 1, + 'SRL_F_SORT_KEYS' => 1024, + 'SRL_F_SORT_KEYS_PERL' => 65536, + 'SRL_F_SORT_KEYS_PERL_REV' => 131072, + 'SRL_F_STRINGIFY_UNKNOWN' => 16, + 'SRL_F_UNDEF_UNKNOWN' => 8, + 'SRL_F_WARN_UNKNOWN' => 32, + '_FLAG_NAME' => [ + 'SHARED_HASHKEYS', + 'REUSE', + 'CROAK_ON_BLESS', + 'UNDEF_UNKNOWN', + 'STRINGIFY_UNKNOWN', + 'WARN_UNKNOWN', + 'COMPRESS_SNAPPY', + 'COMPRESS_SNAPPY_INCREMENTAL', + 'COMPRESS_ZLIB', + 'NOWARN_UNKNOWN_OVERLOAD', + 'SORT_KEYS', + 'DEDUPE_STRINGS', + 'ALIASED_DEDUPE_STRINGS', + 'NO_BLESS_OBJECTS', + 'ENABLE_FREEZE_SUPPORT', + 'CANONICAL_REFS', + 'SORT_KEYS_PERL', + 'SORT_KEYS_PERL_REV' + ] +}; #end generated use Exporter 'import'; our @EXPORT_OK = qw( @@ -37,6 +80,29 @@ sub CLONE_SKIP {1} XSLoader::load('Sereal::Encoder', $XS_VERSION); +sub encode_to_file { + my ($self, $file, $struct, $append)= @_; + my $mode= $append ? ">>" : ">"; + open my $fh, $mode, $file + or die "Failed to open '$file' for " . ($append ? "append" : "write") . ": $!"; + print $fh $self->encode($struct) + or die "Failed to print to '$file': $!"; + close $fh + or die "Failed to close '$file': $!"; +} + +my $flags= sub { + my ($int, $ary)= @_; + return map { + ($ary->[$_] and $int & (1 << $_)) ? $ary->[$_] : () + } (0..$#$ary); +}; + +sub flag_names { + my ($self, $val)= @_; + return $flags->($val // $self->flags, _FLAG_NAME); +} + 1; __END__ @@ -275,11 +341,23 @@ variables on use, and some of its rules are a little arcane (for instance utf8 keys), and so two hashes that might appear to be the same might still produce different output as far as Sereal is concerned. -The thusly allocated encoder object and its output buffer will be reused -between invocations of C, so hold on to it for an efficiency -gain if you plan to serialize multiple similar data structures, but destroy -it if you serialize a single very large data structure just once to free -the memory. +As of 3.006_007 (prerelease candidate for 3.007) the sort order has been changed +to the following: order by length of keys (in bytes) ascending, then by byte +order of the raw underlying string, then by utf8ness, with non-utf8 first. This +order was chosen because it is the most efficient to implement, both in terms +of memory and time. This sort order is enabled when sort_keys is set to 1. + +You may also produce output in Perl "cmp" order, by setting sort_keys to 2. +And for backwards compatibility you may also produce output in reverse Perl +"cmp" order by setting sort_keys to 3. Prior to 3.006_007 this was the +only sort order possible, although it was not explicitly defined what it was. + +Note that comparatively speaking both of the "cmp" sort orders are slow and +memory inefficient. Unless you have a really good reason stick to the default +which is fast and as lean as possible. + +Unless you are concerned with "cross process canonical representation" then +it doesn't matter what option you choose. See L for why you might want to use this, and for the various caveats involved. @@ -736,7 +814,7 @@ Sereal currently will choose the *string* value when it detects these items. It is possible that a future release of the protocol will fix these issues. -=back 4 +=back =head1 BUGS, CONTACT AND SUPPORT diff --git a/lib/Sereal/Encoder/Constants.pm b/lib/Sereal/Encoder/Constants.pm index 5394539..bd33273 100644 --- a/lib/Sereal/Encoder/Constants.pm +++ b/lib/Sereal/Encoder/Constants.pm @@ -4,7 +4,7 @@ use warnings; require Exporter; our @ISA= qw(Exporter); -our $VERSION = '3.006'; # Don't forget to update the TestCompat set for testing against installed encoders! +our $VERSION = '3.008'; # Don't forget to update the TestCompat set for testing against installed encoders! our (@EXPORT_OK, %DEFINE, %TAG_INFO_HASH, @TAG_INFO_ARRAY); diff --git a/qsort.h b/qsort.h new file mode 100644 index 0000000..07480d5 --- /dev/null +++ b/qsort.h @@ -0,0 +1,290 @@ +/* $Id: qsort.h,v 1.5 2008-01-28 18:16:49 mjt Exp $ + * Adopted from GNU glibc by Mjt. + * See stdlib/qsort.c in glibc */ + +/* Copyright (C) 1991, 1992, 1996, 1997, 1999 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Written by Douglas C. Schmidt (schmidt@ics.uci.edu). + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +/* in-line qsort implementation. Differs from traditional qsort() routine + * in that it is a macro, not a function, and instead of passing an address + * of a comparison routine to the function, it is possible to inline + * comparison routine, thus speeding up sorting a lot. + * + * Usage: + * #include "iqsort.h" + * #define islt(a,b) (strcmp((*a),(*b))<0) + * char *arr[]; + * int n; + * QSORT(char*, arr, n, islt); + * + * The "prototype" and 4 arguments are: + * QSORT(TYPE,BASE,NELT,ISLT) + * 1) type of each element, TYPE, + * 2) address of the beginning of the array, of type TYPE*, + * 3) number of elements in the array, and + * 4) comparision routine. + * Array pointer and number of elements are referenced only once. + * This is similar to a call + * qsort(BASE,NELT,sizeof(TYPE),ISLT) + * with the difference in last parameter. + * Note the islt macro/routine (it receives pointers to two elements): + * the only condition of interest is whenever one element is less than + * another, no other conditions (greather than, equal to etc) are tested. + * So, for example, to define integer sort, use: + * #define islt(a,b) ((*a)<(*b)) + * QSORT(int, arr, n, islt) + * + * The macro could be used to implement a sorting function (see examples + * below), or to implement the sorting algorithm inline. That is, either + * create a sorting function and use it whenever you want to sort something, + * or use QSORT() macro directly instead a call to such routine. Note that + * the macro expands to quite some code (compiled size of int qsort on x86 + * is about 700..800 bytes). + * + * Using this macro directly it isn't possible to implement traditional + * qsort() routine, because the macro assumes sizeof(element) == sizeof(TYPE), + * while qsort() allows element size to be different. + * + * Several ready-to-use examples: + * + * Sorting array of integers: + * void int_qsort(int *arr, unsigned n) { + * #define int_lt(a,b) ((*a)<(*b)) + * QSORT(int, arr, n, int_lt); + * } + * + * Sorting array of string pointers: + * void str_qsort(char *arr[], unsigned n) { + * #define str_lt(a,b) (strcmp((*a),(*b)) < 0) + * QSORT(char*, arr, n, str_lt); + * } + * + * Sorting array of structures: + * + * struct elt { + * int key; + * ... + * }; + * void elt_qsort(struct elt *arr, unsigned n) { + * #define elt_lt(a,b) ((a)->key < (b)->key) + * QSORT(struct elt, arr, n, elt_lt); + * } + * + * And so on. + */ + +/* Swap two items pointed to by A and B using temporary buffer t. */ +#define _QSORT_SWAP(a, b, t) ((void)((t = *a), (*a = *b), (*b = t))) + +/* Discontinue quicksort algorithm when partition gets below this size. + This particular magic number was chosen to work best on a Sun 4/260. */ +#define _QSORT_MAX_THRESH 4 + +/* Stack node declarations used to store unfulfilled partition obligations + * (inlined in QSORT). +typedef struct { + QSORT_TYPE *_lo, *_hi; +} qsort_stack_node; + */ + +/* The next 4 #defines implement a very fast in-line stack abstraction. */ +/* The stack needs log (total_elements) entries (we could even subtract + log(MAX_THRESH)). Since total_elements has type unsigned, we get as + upper bound for log (total_elements): + bits per byte (CHAR_BIT) * sizeof(unsigned). */ +#define _QSORT_STACK_SIZE (8 * sizeof(unsigned)) +#define _QSORT_PUSH(top, low, high) \ + (((top->_lo = (low)), (top->_hi = (high)), ++top)) +#define _QSORT_POP(low, high, top) \ + ((--top, (low = top->_lo), (high = top->_hi))) +#define _QSORT_STACK_NOT_EMPTY (_stack < _top) + + +/* Order size using quicksort. This implementation incorporates + four optimizations discussed in Sedgewick: + + 1. Non-recursive, using an explicit stack of pointer that store the + next array partition to sort. To save time, this maximum amount + of space required to store an array of SIZE_MAX is allocated on the + stack. Assuming a 32-bit (64 bit) integer for size_t, this needs + only 32 * sizeof(stack_node) == 256 bytes (for 64 bit: 1024 bytes). + Pretty cheap, actually. + + 2. Chose the pivot element using a median-of-three decision tree. + This reduces the probability of selecting a bad pivot value and + eliminates certain extraneous comparisons. + + 3. Only quicksorts TOTAL_ELEMS / MAX_THRESH partitions, leaving + insertion sort to order the MAX_THRESH items within each partition. + This is a big win, since insertion sort is faster for small, mostly + sorted array segments. + + 4. The larger of the two sub-partitions is always pushed onto the + stack first, with the algorithm then concentrating on the + smaller partition. This *guarantees* no more than log (total_elems) + stack size is needed (actually O(1) in this case)! */ + +/* The main code starts here... */ +#define QSORT(QSORT_TYPE,QSORT_BASE,QSORT_NELT,QSORT_LT) \ +{ \ + QSORT_TYPE *const _base = (QSORT_BASE); \ + const unsigned _elems = (QSORT_NELT); \ + QSORT_TYPE _hold; \ + \ + /* Don't declare two variables of type QSORT_TYPE in a single \ + * statement: eg `TYPE a, b;', in case if TYPE is a pointer, \ + * expands to `type* a, b;' wich isn't what we want. \ + */ \ + \ + if (_elems > _QSORT_MAX_THRESH) { \ + QSORT_TYPE *_lo = _base; \ + QSORT_TYPE *_hi = _lo + _elems - 1; \ + struct { \ + QSORT_TYPE *_hi; QSORT_TYPE *_lo; \ + } _stack[_QSORT_STACK_SIZE], *_top = _stack + 1; \ + \ + while (_QSORT_STACK_NOT_EMPTY) { \ + QSORT_TYPE *_left_ptr; QSORT_TYPE *_right_ptr; \ + \ + /* Select median value from among LO, MID, and HI. Rearrange \ + LO and HI so the three values are sorted. This lowers the \ + probability of picking a pathological pivot value and \ + skips a comparison for both the LEFT_PTR and RIGHT_PTR in \ + the while loops. */ \ + \ + QSORT_TYPE *_mid = _lo + ((_hi - _lo) >> 1); \ + \ + if (QSORT_LT (_mid, _lo)) \ + _QSORT_SWAP (_mid, _lo, _hold); \ + if (QSORT_LT (_hi, _mid)) { \ + _QSORT_SWAP (_mid, _hi, _hold); \ + if (QSORT_LT (_mid, _lo)) \ + _QSORT_SWAP (_mid, _lo, _hold); \ + } \ + \ + _left_ptr = _lo + 1; \ + _right_ptr = _hi - 1; \ + \ + /* Here's the famous ``collapse the walls'' section of quicksort. \ + Gotta like those tight inner loops! They are the main reason \ + that this algorithm runs much faster than others. */ \ + do { \ + while (QSORT_LT (_left_ptr, _mid)) \ + ++_left_ptr; \ + \ + while (QSORT_LT (_mid, _right_ptr)) \ + --_right_ptr; \ + \ + if (_left_ptr < _right_ptr) { \ + _QSORT_SWAP (_left_ptr, _right_ptr, _hold); \ + if (_mid == _left_ptr) \ + _mid = _right_ptr; \ + else if (_mid == _right_ptr) \ + _mid = _left_ptr; \ + ++_left_ptr; \ + --_right_ptr; \ + } \ + else if (_left_ptr == _right_ptr) { \ + ++_left_ptr; \ + --_right_ptr; \ + break; \ + } \ + } while (_left_ptr <= _right_ptr); \ + \ + /* Set up pointers for next iteration. First determine whether \ + left and right partitions are below the threshold size. If so, \ + ignore one or both. Otherwise, push the larger partition's \ + bounds on the stack and continue sorting the smaller one. */ \ + \ + if (_right_ptr - _lo <= _QSORT_MAX_THRESH) { \ + if (_hi - _left_ptr <= _QSORT_MAX_THRESH) \ + /* Ignore both small partitions. */ \ + _QSORT_POP (_lo, _hi, _top); \ + else \ + /* Ignore small left partition. */ \ + _lo = _left_ptr; \ + } \ + else if (_hi - _left_ptr <= _QSORT_MAX_THRESH) \ + /* Ignore small right partition. */ \ + _hi = _right_ptr; \ + else if (_right_ptr - _lo > _hi - _left_ptr) { \ + /* Push larger left partition indices. */ \ + _QSORT_PUSH (_top, _lo, _right_ptr); \ + _lo = _left_ptr; \ + } \ + else { \ + /* Push larger right partition indices. */ \ + _QSORT_PUSH (_top, _left_ptr, _hi); \ + _hi = _right_ptr; \ + } \ + } \ + } \ + \ + /* Once the BASE array is partially sorted by quicksort the rest \ + is completely sorted using insertion sort, since this is efficient \ + for partitions below MAX_THRESH size. BASE points to the \ + beginning of the array to sort, and END_PTR points at the very \ + last element in the array (*not* one beyond it!). */ \ + \ + { \ + QSORT_TYPE *const _end_ptr = _base + _elems - 1; \ + QSORT_TYPE *_tmp_ptr = _base; \ + register QSORT_TYPE *_run_ptr; \ + QSORT_TYPE *_thresh; \ + \ + _thresh = _base + _QSORT_MAX_THRESH; \ + if (_thresh > _end_ptr) \ + _thresh = _end_ptr; \ + \ + /* Find smallest element in first threshold and place it at the \ + array's beginning. This is the smallest array element, \ + and the operation speeds up insertion sort's inner loop. */ \ + \ + for (_run_ptr = _tmp_ptr + 1; _run_ptr <= _thresh; ++_run_ptr) \ + if (QSORT_LT (_run_ptr, _tmp_ptr)) \ + _tmp_ptr = _run_ptr; \ + \ + if (_tmp_ptr != _base) \ + _QSORT_SWAP (_tmp_ptr, _base, _hold); \ + \ + /* Insertion sort, running from left-hand-side \ + * up to right-hand-side. */ \ + \ + _run_ptr = _base + 1; \ + while (++_run_ptr <= _end_ptr) { \ + _tmp_ptr = _run_ptr - 1; \ + while (QSORT_LT (_run_ptr, _tmp_ptr)) \ + --_tmp_ptr; \ + \ + ++_tmp_ptr; \ + if (_tmp_ptr != _run_ptr) { \ + QSORT_TYPE *_trav = _run_ptr + 1; \ + while (--_trav >= _run_ptr) { \ + QSORT_TYPE *_hi; QSORT_TYPE *_lo; \ + _hold = *_trav; \ + \ + for (_hi = _lo = _trav; --_lo >= _tmp_ptr; _hi = _lo) \ + *_hi = *_lo; \ + *_hi = _hold; \ + } \ + } \ + } \ + } \ + \ +} diff --git a/srl_common.h b/srl_common.h index d51d309..97eb0c3 100644 --- a/srl_common.h +++ b/srl_common.h @@ -59,9 +59,9 @@ * http://en.wikipedia.org/wiki/FLAGS_register_(computing) */ #ifdef SRL_X86_OR_X64_CPU -# if __x86_64__ || __x86_64 +# if defined(__x86_64__) || defined(__x86_64) # define SRL_TRY_ENABLE_STRICT_ALIGN() asm("pushf\norl $0x40000, (%rsp)\npopf") -# elif __i386__ || __i386 +# elif defined(__i386__) || defined(__i386) # define SRL_TRY_ENABLE_STRICT_ALIGN() asm("pushf\norl $0x40000, (%esp)\npopf") # endif #else diff --git a/srl_encoder.c b/srl_encoder.c index fb1499a..f6ac87a 100644 --- a/srl_encoder.c +++ b/srl_encoder.c @@ -52,6 +52,7 @@ extern "C" { #include "ptable.h" #include "srl_buffer.h" #include "srl_compress.h" +#include "qsort.h" /* The ENABLE_DANGEROUS_HACKS (passed through from ENV via Makefile.PL) enables * optimizations that may make the code so cozy with a particular version of the @@ -91,7 +92,7 @@ SRL_STATIC_INLINE void srl_dump_hv(pTHX_ srl_encoder_t *enc, HV *src, U32 refcnt SRL_STATIC_INLINE void srl_dump_hk(pTHX_ srl_encoder_t *enc, HE *src, const int share_keys); SRL_STATIC_INLINE void srl_dump_nv(pTHX_ srl_encoder_t *enc, SV *src); SRL_STATIC_INLINE void srl_dump_ivuv(pTHX_ srl_encoder_t *enc, SV *src); -SRL_STATIC_INLINE void srl_dump_classname(pTHX_ srl_encoder_t *enc, SV *referent, SV *replacement); +SRL_STATIC_INLINE int srl_dump_classname(pTHX_ srl_encoder_t *enc, SV *referent, SV *replacement); SRL_STATIC_INLINE SV *srl_get_frozen_object(pTHX_ srl_encoder_t *enc, SV *src, SV *referent); SRL_STATIC_INLINE PTABLE_t *srl_init_string_hash(srl_encoder_t *enc); SRL_STATIC_INLINE PTABLE_t *srl_init_ref_hash(srl_encoder_t *enc); @@ -123,6 +124,8 @@ SRL_STATIC_INLINE srl_encoder_t *srl_dump_data_structure(pTHX_ srl_encoder_t *en ? srl_init_weak_hash(enc) \ : (enc)->weak_seenhash ) +#define SRL_GET_WEAK_SEENHASH_OR_NULL(enc) ((enc)->weak_seenhash) + #define SRL_GET_FREEZEOBJ_SVHASH(enc) ( (enc)->freezeobj_svhash == NULL \ ? srl_init_freezeobj_svhash(enc) \ : (enc)->freezeobj_svhash ) @@ -390,7 +393,7 @@ SRL_STATIC_INLINE srl_encoder_t * srl_empty_encoder_struct(pTHX) { srl_encoder_t *enc; - Newx(enc, 1, srl_encoder_t); + Newxz(enc, 1, srl_encoder_t); if (enc == NULL) croak("Out of memory"); @@ -400,25 +403,8 @@ srl_empty_encoder_struct(pTHX) croak("Out of memory"); } - /* Set the tmp buffer struct's char buffer to NULL so we don't free - * something nasty if it's unused. */ - enc->tmp_buf.start = NULL; - enc->protocol_version = SRL_PROTOCOL_VERSION; - enc->recursion_depth = 0; enc->max_recursion_depth = DEFAULT_MAX_RECUR_DEPTH; - enc->operational_flags = 0; - /*enc->flags = 0;*/ /* to be set elsewhere */ - - enc->weak_seenhash = NULL; - enc->str_seenhash = NULL; - enc->ref_seenhash = NULL; - enc->snappy_workmem = NULL; - enc->string_deduper_hv = NULL; - - enc->freezeobj_svhash = NULL; - enc->sereal_string_sv = NULL; - enc->scratch_sv = NULL; return enc; } @@ -548,8 +534,15 @@ srl_build_encoder_struct(pTHX_ HV *opt, sv_with_hash *options) my_hv_fetchs(he, val, opt, SRL_ENC_OPT_IDX_SORT_KEYS); if ( !val ) my_hv_fetchs(he, val, opt, SRL_ENC_OPT_IDX_CANONICAL); - if ( val && SvTRUE(val) ) + if ( val && SvTRUE(val) ) { SRL_ENC_SET_OPTION(enc, SRL_F_SORT_KEYS); + if (SvIV(val) > 1) { + SRL_ENC_SET_OPTION(enc, SRL_F_SORT_KEYS_PERL); + if (SvIV(val) > 2) { + SRL_ENC_SET_OPTION(enc, SRL_F_SORT_KEYS_PERL_REV); + } + } + } my_hv_fetchs(he, val, opt, SRL_ENC_OPT_IDX_CANONICAL_REFS); if ( !val ) @@ -878,8 +871,11 @@ srl_get_frozen_object(pTHX_ srl_encoder_t *enc, SV *src, SV *referent) } /* Outputs a bless header and the class name (as some form of string or COPY). - * Caller then has to output the actual reference payload. */ -SRL_STATIC_INLINE void + * Caller then has to output the actual reference payload. + * If it returns 1 it means the classname was written out and should NOT + * be overwritten by the ref rewrite logic (which handles REFP). + * If it returns 0 it means no classname was output. */ +SRL_STATIC_INLINE int srl_dump_classname(pTHX_ srl_encoder_t *enc, SV *referent, SV *replacement) { /* Check that we actually want to support objects */ @@ -888,11 +884,25 @@ srl_dump_classname(pTHX_ srl_encoder_t *enc, SV *referent, SV *replacement) "using Sereal::Encoder was explicitly disabled using the " "'croak_on_bless' option."); } else if (expect_false( SRL_ENC_HAVE_OPTION(enc, SRL_F_NO_BLESS_OBJECTS) )) { - return; + return 0; } else { const HV *stash = SvSTASH(referent); PTABLE_t *string_seenhash = SRL_GET_STR_PTR_SEENHASH(enc); - const ptrdiff_t oldoffset = (ptrdiff_t)PTABLE_fetch(string_seenhash, (SV *)stash); + svtype svt= SvTYPE(referent); + int is_av_or_hv= (svt == SVt_PVAV || svt== SVt_PVHV); + ptrdiff_t oldoffset= is_av_or_hv + ? 0 + : (ptrdiff_t)PTABLE_fetch(string_seenhash, referent); + + if (oldoffset) { + return 0; + } else { + svt= replacement ? SvTYPE(replacement) : SvTYPE(referent); + if (SRL_UNSUPPORTED_SvTYPE(svt)) { + return 0; + } + oldoffset= (ptrdiff_t)PTABLE_fetch(string_seenhash, (SV *)stash); + } if (oldoffset != 0) { /* Issue COPY instead of literal class name string */ @@ -923,7 +933,15 @@ srl_dump_classname(pTHX_ srl_encoder_t *enc, SV *referent, SV *replacement) srl_dump_pv(aTHX_ enc, class_name, len, 0); #endif } + if (is_av_or_hv) { + return 0; + } else { + /* use the string_seenhash to track which items we have seen before */ + PTABLE_store(string_seenhash, (void *)referent, INT2PTR(void *, BODY_POS_OFS(&enc->buf))); + return 1; + } } + return 0; } @@ -1027,25 +1045,30 @@ srl_dump_data_structure_mortal_sv(pTHX_ srl_encoder_t *enc, SV *src, SV *user_he SRL_STATIC_INLINE void srl_fixup_weakrefs(pTHX_ srl_encoder_t *enc) { - PTABLE_t *weak_seenhash = SRL_GET_WEAK_SEENHASH(enc); - PTABLE_ITER_t *it = PTABLE_iter_new(weak_seenhash); - PTABLE_ENTRY_t *ent; - - /* we now walk the weak_seenhash and set any tags it points - * at to the PAD opcode, this basically turns the first weakref - * we encountered into a normal ref when there is only a weakref - * pointing at the structure. */ - while ( NULL != (ent = PTABLE_iter_next(it)) ) { - const ptrdiff_t offset = (ptrdiff_t)ent->value; - if ( offset ) { - srl_buffer_char *pos = enc->buf.body_pos + offset; - assert(*pos == SRL_HDR_WEAKEN); - if (DEBUGHACK) warn("setting byte at offset %"UVuf" to PAD", (UV)offset); - *pos = SRL_HDR_PAD; + PTABLE_t *weak_seenhash = SRL_GET_WEAK_SEENHASH_OR_NULL(enc); + if (!weak_seenhash) + return; + + { + PTABLE_ITER_t *it = PTABLE_iter_new(weak_seenhash); + PTABLE_ENTRY_t *ent; + + /* we now walk the weak_seenhash and set any tags it points + * at to the PAD opcode, this basically turns the first weakref + * we encountered into a normal ref when there is only a weakref + * pointing at the structure. */ + while ( NULL != (ent = PTABLE_iter_next(it)) ) { + const ptrdiff_t offset = (ptrdiff_t)ent->value; + if ( offset ) { + srl_buffer_char *pos = enc->buf.body_pos + offset; + assert(*pos == SRL_HDR_WEAKEN); + if (DEBUGHACK) warn("setting byte at offset %"UVuf" to PAD", (UV)offset); + *pos = SRL_HDR_PAD; + } } - } - PTABLE_iter_free(it); + PTABLE_iter_free(it); + } } @@ -1141,145 +1164,290 @@ srl_dump_av(pTHX_ srl_encoder_t *enc, AV *src, U32 refcount) } } -/* compare hash entries, used when all keys are bytestrings */ -static int -he_cmp_fast(const void *a_, const void *b_) +SRL_STATIC_INLINE void +srl_dump_hv_unsorted_nomg(pTHX_ srl_encoder_t *enc, HV *src, UV n) { - /* even though we are called as a callback from qsort there is - * no need for a dTHX here, we don't use anything that needs it */ - int cmp; + HE *he; + const int do_share_keys = HvSHAREKEYS((SV *)src); + HE **he_ptr= HvARRAY(src); + HE **he_end= he_ptr + HvMAX(src) + 1; - HE *a = *(HE **)a_; - HE *b = *(HE **)b_; + do { + for (he= *he_ptr++; he; he= HeNEXT(he) ) { + SV *v= HeVAL(he); + if (v != &PL_sv_placeholder) { + srl_dump_hk(aTHX_ enc, he, do_share_keys); + CALL_SRL_DUMP_SV(enc, v); + if (--n == 0) { + he_ptr= he_end; + break; + } + } + } + } while ( he_ptr < he_end ); +} + +SRL_STATIC_INLINE void +srl_dump_hv_unsorted_mg(pTHX_ srl_encoder_t *enc, HV *src, const UV n) +{ + HE *he; + UV i= 0; + const int do_share_keys = HvSHAREKEYS((SV *)src); + + (void)hv_iterinit(src); /* return value not reliable according to API docs */ + while ((he = hv_iternext(src))) { + SV *v; + if (expect_false( i == n )) + croak("Panic: cannot serialize a tied hash which changes its size!"); + v= hv_iterval(src, he); + srl_dump_hk(aTHX_ enc, he, do_share_keys); + CALL_SRL_DUMP_SV(enc, v); + ++i; + } + if (expect_false( i != n )) + croak("Panic: cannot serialize a tied hash which changes its size!"); +} + +/* sorting hashes - nothing in perl is easy. ever. + * + * Some things to keep in mind about perl hashes as you read this code: + * + * Hashes may be shared or not. Usually shared. This means they share their + * key data via PL_strtab. + * + * Hashes may be tied or not. Usually not. When tied the keys from the hash + * are available only as SV *'s, and when untied, the keys from the hash are + * accessed via HE *'s. + * + * Some HE's actually contains SV's but most contain a ptr/len combo with + * an utf8 flag. To make things even more interesting utf8 keys are + * normalized to latin1 by perl where possible before being stored in the HE, + * with the utf8 flag indicating "was utf8" instead of "is utf8" or "not utf8". + * + * The complexity about accessing the key for a hash can be managed away by + * perl via API's like hv_iterkeysv(), but using that means constructing mortal + * SV's for each key as we go. + * + * We could in theory use the HePV() interface, but one annoying result of the + * "was utf8" logic is that we can't use a sort comparator which looks + * at the raw binary of the keys when the keys might contain utf8. A utf8 key + * like "\xDF" will be downgraded to ascii in the HE form, but will be upgraded + * to the utf8 representation in the SV form. So if we want to do "fast" sorting + * we have to restrict it to non-utf8/non-sv keys, and force the use of the SV + * based API (which we have to use for tie's anyway) when we see a UTF8 key. + * + * Which is what we do below. In order to sort a hash we need to construct an + * array of its contents, in srl_dump_sorted_nomg() we walk the hash, checking + * each key, and copying each HE over into a scratch buffer which it then sorts. + * If during the transcription process it sees any utf8 or SV keys it exits + * immediately, and falls through to srl_dump_sort_mg(), which uses hv_iterkeysv() + * to construct an array of HE_SV instead, which we then sort. + */ - STRLEN la = HeKLEN (a); - STRLEN lb = HeKLEN (b); - if (!(cmp = memcmp (HeKEY (b), HeKEY (a), lb < la ? lb : la))) - cmp = lb - la; - return cmp; +SRL_STATIC_INLINE int +he_islt(const HE *a, const HE *b) +{ + /* no need for a dTHX here, we don't use anything that needs it */ + const STRLEN la = HeKLEN(a); + const STRLEN lb = HeKLEN(b); + const int cmp = memcmp(HeKEY(a), HeKEY(b), la < lb ? la : lb); + if (cmp) { + return cmp < 0; + } else { + return la < lb; + } } -/* compare hash entries, used when some keys are sv's or utf8 */ -static int -he_cmp_slow(const void *a, const void *b) +SRL_STATIC_INLINE int +he_sv_islt_fast(const HE_SV *a, const HE_SV *b) { - /* we are called as a callback from qsort, so no pTHX - * is possible in our argument signature, so we need to do a - * dTHX; here ourselves. */ - dTHX; - return sv_cmp( HeSVKEY_force( *(HE **)b), HeSVKEY_force( *(HE **)a ) ); + /* no need for a dTHX here, we don't use anything that needs it */ + char *a_ptr; + char *b_ptr; + int a_isutf8; + int b_isutf8; + const STRLEN a_len= a->key.sv ? SvCUR(a->key.sv) : HeKLEN(a->val.he); + const STRLEN b_len= b->key.sv ? SvCUR(b->key.sv) : HeKLEN(b->val.he); + if (a_len != b_len) { + return a_len < b_len; + } + a_isutf8= (a->key.sv ? SvUTF8(a->key.sv) : HeKUTF8(a->val.he)) ? 0 : 1; + b_isutf8= (b->key.sv ? SvUTF8(b->key.sv) : HeKUTF8(b->val.he)) ? 0 : 1; + if (a_isutf8 != b_isutf8) { + return a_isutf8 < b_isutf8; + } + a_ptr= a->key.sv ? SvPVX(a->key.sv) : HeKEY(a->val.he); + b_ptr= b->key.sv ? SvPVX(b->key.sv) : HeKEY(b->val.he); + return memcmp(a_ptr, b_ptr, a_len < b_len ? a_len : b_len ) < 0; } +#define ISLT_HE_SV(a,b) he_sv_islt_fast( a, b ) +#define ISLT_SV_CMP(a,b) sv_cmp(a->key.sv, b->key.sv) == sort_dir + SRL_STATIC_INLINE void -srl_dump_hv(pTHX_ srl_encoder_t *enc, HV *src, U32 refcount) +srl_qsort(pTHX_ srl_encoder_t *enc, const UV n, HE_SV *array) +{ + if ( SRL_ENC_HAVE_OPTION(enc, SRL_F_SORT_KEYS_PERL) ) { + int sort_dir= SRL_ENC_HAVE_OPTION(enc, SRL_F_SORT_KEYS_PERL_REV) ? 1 : -1; + /* hack to forcefully disable "use bytes" */ + COP cop= *PL_curcop; + cop.op_private= 0; + + ENTER; + SAVETMPS; + + SAVEVPTR (PL_curcop); + PL_curcop= &cop; + + /* now sort */ + QSORT(HE_SV, array, n, ISLT_SV_CMP); + + FREETMPS; + LEAVE; + } else { + /* now sort */ + QSORT(HE_SV, array, n, ISLT_HE_SV); + } +} + + +SRL_STATIC_INLINE void +srl_dump_hv_sorted_sv_slow(pTHX_ srl_encoder_t *enc, HV *src, const UV n, HE_SV *array) { HE *he; + UV i= 0; const int do_share_keys = HvSHAREKEYS((SV *)src); - UV n; + const int is_tie= !array; + + /* This sub is used for ties, and for hashes with SV keys in them, + * and when the user requests SORT_KEYS_PERL, it is the slowest way + * and most memory hungry way to serialize a hash. We will use the + * full perl api for extracting the contents of the hash, which fortifies + * us against ties, and we will convert all keys into mortal + * sv's where necessary. This means we can use sv_cmp on the keys + * if we wish. + */ - if ( SvMAGICAL(src) || SRL_ENC_HAVE_OPTION(enc, SRL_F_SORT_KEYS) ) { - UV i; - /* for tied hashes, we have to iterate to find the number of entries. Alas... */ - (void)hv_iterinit(src); /* return value not reliable according to API docs */ - n = 0; - while ((he = hv_iternext(src))) { ++n; } + (void)hv_iterinit(src); /* return value not reliable according to API docs */ + { + HE_SV *array_end; + if (!array) { + Newx(array, n, HE_SV); + SAVEFREEPV(array); + } + array_end= array + n; + while ((he = hv_iternext(src))) { + if (expect_false( i == n )) + croak("Panic: cannot serialize a %s hash which changes its size!",is_tie ? "tied" : "untied"); + array[i].key.sv= hv_iterkeysv(he); + array[i].val.sv= hv_iterval(src,he); + i++; + } + if (expect_false( i != n )) + croak("Panic: can not serialize a %s hash which changes it size!", is_tie ? "tied" : "untied"); - BUF_SIZE_ASSERT_HV(&enc->buf, n); + srl_qsort(aTHX_ enc, n, array); - if (n < 16 && refcount == 1 && !SRL_ENC_HAVE_OPTION(enc,SRL_F_CANONICAL_REFS)) { - enc->buf.pos--; /* back up over the previous REFN */ - srl_buf_cat_char_nocheck(&enc->buf, SRL_HDR_HASHREF + n); - } else { - srl_buf_cat_varint_nocheck(aTHX_ &enc->buf, SRL_HDR_HASH, n); + while ( array < array_end ) { + CALL_SRL_DUMP_SV(enc, array->key.sv); + CALL_SRL_DUMP_SV(enc, array->val.sv); + array++; } + } +} - (void)hv_iterinit(src); /* return value not reliable according to API docs */ - i = 0; - if (SRL_ENC_HAVE_OPTION(enc, SRL_F_SORT_KEYS)) { - HE **he_array; - int fast = 1; - Newxz(he_array, n, HE*); - SAVEFREEPV(he_array); - while ((he = hv_iternext(src))) { - if (expect_false( i == n )) - croak("Panic: Trying to dump a tied hash that has a different number of keys in each iteration is just daft. Do not do that."); - he_array[i++]= he; - if (HeKLEN (he) < 0 || HeKUTF8 (he)) - fast = 0; - } - if (expect_false( i != n )) - croak("Panic: Trying to dump a tied hash that has a different number of keys in each iteration is just daft. Do not do that."); - if (fast) { - qsort(he_array, n, sizeof (HE *), he_cmp_fast); - } else { - /* hack to forcefully disable "use bytes" */ - COP cop= *PL_curcop; - cop.op_private= 0; - - ENTER; - SAVETMPS; - SAVEVPTR (PL_curcop); - PL_curcop= &cop; +SRL_STATIC_INLINE void +srl_dump_hv_sorted_nomg(pTHX_ srl_encoder_t *enc, HV *src, const UV n) +{ + HE *he; + const int do_share_keys = HvSHAREKEYS((SV *)src); - qsort(he_array, n, sizeof (HE *), he_cmp_slow); + /* This sub is used only for untied hashes and when the user wants + * sorted keys, but not necessarily the order that perl would use. + */ - FREETMPS; - LEAVE; - } - for ( i= 0; i < n ; i++ ) { - SV *v; - he= he_array[i]; - v= hv_iterval(src, he); - srl_dump_hk(aTHX_ enc, he, do_share_keys); - CALL_SRL_DUMP_SV(enc, v); - } - } else { - while ((he = hv_iternext(src))) { - SV *v; - if (expect_false( i == n )) - croak("Panic: Trying to dump a tied hash that has a different number of keys in each iteration is just daft. Do not do that."); - v= hv_iterval(src, he); - srl_dump_hk(aTHX_ enc, he, do_share_keys); - CALL_SRL_DUMP_SV(enc, v); - ++i; + (void)hv_iterinit(src); /* return value not reliable according to API docs */ + { + HE_SV *array; + HE_SV *array_ptr; + HE_SV *array_end; + Newx(array, n, HE_SV); + SAVEFREEPV(array); + array_ptr = array; + while ((he = hv_iternext(src))) { + if ( HeKWASUTF8(he) ) { + array_ptr->key.sv= hv_iterkeysv(he); + } else { + array_ptr->key.sv = HeSVKEY(he); } - if (expect_false( i != n )) - croak("Panic: Trying to dump a tied hash that has a different number of keys in each iteration is just daft. Do not do that."); + array_ptr->val.he = he; + array_ptr++; } - } else { + + srl_qsort(aTHX_ enc, n, array); + + array_end = array + n; + for ( array_end= array + n; array < array_end; array++ ) { + SV *v; + he = array->val.he; + v = hv_iterval(src, he); + srl_dump_hk(aTHX_ enc, he, do_share_keys); + CALL_SRL_DUMP_SV(enc, v); + } + } +} + +SRL_STATIC_INLINE void +srl_dump_hv(pTHX_ srl_encoder_t *enc, HV *src, U32 refcount) +{ + HE *he; + UV n; + if ( SvMAGICAL(src) ) { + /* for tied hashes, we have to iterate to find the number of entries. Alas... */ + n= 0; + (void)hv_iterinit(src); /* return value not reliable according to API docs */ + while ((he = hv_iternext(src))) { ++n; } + } + else { n= HvUSEDKEYS(src); - BUF_SIZE_ASSERT_HV(&enc->buf, n); - if (n < 16 && refcount == 1 && !SRL_ENC_HAVE_OPTION(enc,SRL_F_CANONICAL_REFS)) { - enc->buf.pos--; /* backup over the previous REFN */ - srl_buf_cat_char_nocheck(&enc->buf, SRL_HDR_HASHREF + n); - } else { - srl_buf_cat_varint_nocheck(aTHX_ &enc->buf, SRL_HDR_HASH, n); + } + + BUF_SIZE_ASSERT_HV(&enc->buf, n); + if (n < 16 && refcount == 1 && !SRL_ENC_HAVE_OPTION(enc,SRL_F_CANONICAL_REFS)) { + enc->buf.pos--; /* backup over the previous REFN */ + srl_buf_cat_char_nocheck(&enc->buf, SRL_HDR_HASHREF + n); + } else { + srl_buf_cat_varint_nocheck(aTHX_ &enc->buf, SRL_HDR_HASH, n); + } + + if ( n ) { + if ( SvMAGICAL(src) || SRL_ENC_HAVE_OPTION(enc, SRL_F_SORT_KEYS_PERL) ) { + /* SORT_KEYS_PERL implies SORT_KEYS, but we check for either just to be + * careful - yves*/ + if ( SRL_ENC_HAVE_OPTION(enc, SRL_F_SORT_KEYS|SRL_F_SORT_KEYS_PERL) ) { + srl_dump_hv_sorted_sv_slow(aTHX_ enc, src, n, NULL); + } + else { + srl_dump_hv_unsorted_mg(aTHX_ enc, src, n); + } } - if (n) { - HE **he_ptr= HvARRAY(src); - HE **he_end= he_ptr + HvMAX(src) + 1; - do { - for (he= *he_ptr++; he; he= HeNEXT(he) ) { - SV *v= HeVAL(he); - if (v != &PL_sv_placeholder) { - srl_dump_hk(aTHX_ enc, he, do_share_keys); - CALL_SRL_DUMP_SV(enc, v); - if (--n == 0) { - he_ptr= he_end; - break; - } - } - } - } while ( he_ptr < he_end ); + else { + if ( SRL_ENC_HAVE_OPTION(enc, SRL_F_SORT_KEYS) ) { + srl_dump_hv_sorted_nomg(aTHX_ enc, src, n); + } + else { + srl_dump_hv_unsorted_nomg(aTHX_ enc, src, n); + } } } } + SRL_STATIC_INLINE void srl_dump_hk(pTHX_ srl_encoder_t *enc, HE *src, const int share_keys) { @@ -1527,7 +1695,7 @@ redo_dump: #if defined(MODERN_REGEXP) && defined(REGEXP_NO_LONGER_POK) /* Only need to enter here if we have rather modern regexps AND they're * NO LONGER POK (5.17.6 and up). */ - if (expect_false( svt == SVt_REGEXP ) ) { + if ( expect_false( svt == SVt_REGEXP ) ) { srl_dump_regexp(aTHX_ enc, src); } else @@ -1550,10 +1718,13 @@ redo_dump: ref_rewrite_pos= BODY_POS_OFS(&enc->buf); - if (expect_false( sv_isobject(src) )) { + if ( expect_false( sv_isobject(src) ) ) { /* Write bless operator with class name */ replacement= srl_get_frozen_object(aTHX_ enc, src, referent); - srl_dump_classname(aTHX_ enc, referent, replacement); /* 1 == have freeze call */ + if (srl_dump_classname(aTHX_ enc, referent, replacement)) { + /* 1 means we should not rewrite away the classname */ + ref_rewrite_pos= BODY_POS_OFS(&enc->buf); + } } srl_buf_cat_char(&enc->buf, SRL_HDR_REFN); @@ -1583,11 +1754,12 @@ redo_dump: srl_dump_av(aTHX_ enc, (AV *)src, refcount); } else - if (!SvOK(src)) { /* undef and weird shit */ - if ( svt > SVt_PVMG ) { /* we exclude magic, because magic sv's can be undef too */ + if ( ! SvOK(src) ) { /* undef and weird shit */ + if ( SRL_UNSUPPORTED_SvTYPE(svt) ) { + /* we exclude magic, because magic sv's can be undef too */ /* called when we find an unsupported type/reference. May either throw exception * or write ONE (nested or single) item to the buffer. */ -#define SRL_HANDLE_UNSUPPORTED_TYPE(enc, src, svt, refsv, ref_rewrite_pos) \ +#define SRL_HANDLE_UNSUPPORTED_SvTYPE(enc, src, svt, refsv, ref_rewrite_pos) \ STMT_START { \ if ( SRL_ENC_HAVE_OPTION((enc), SRL_F_UNDEF_UNKNOWN) ) { \ if (SRL_ENC_HAVE_OPTION((enc), SRL_F_WARN_UNKNOWN)) \ @@ -1638,7 +1810,7 @@ redo_dump: "by the Sereal encoding format", (svt), sv_reftype((src),0),(src)); \ } \ } STMT_END - SRL_HANDLE_UNSUPPORTED_TYPE(enc, src, svt, refsv, ref_rewrite_pos); + SRL_HANDLE_UNSUPPORTED_SvTYPE(enc, src, svt, refsv, ref_rewrite_pos); } else if (src == &PL_sv_undef && enc->protocol_version >= 3 ) { srl_buf_cat_char(&enc->buf, SRL_HDR_CANONICAL_UNDEF); @@ -1647,8 +1819,8 @@ redo_dump: } } else { - SRL_HANDLE_UNSUPPORTED_TYPE(enc, src, svt, refsv, ref_rewrite_pos); -#undef SRL_HANDLE_UNSUPPORTED_TYPE + SRL_HANDLE_UNSUPPORTED_SvTYPE(enc, src, svt, refsv, ref_rewrite_pos); +#undef SRL_HANDLE_UNSUPPORTED_SvTYPE } --enc->recursion_depth; } diff --git a/srl_encoder.h b/srl_encoder.h index f629410..639f220 100644 --- a/srl_encoder.h +++ b/srl_encoder.h @@ -27,7 +27,11 @@ typedef struct { UV recursion_depth; /* current Perl-ref recursion depth */ ptable_ptr ref_seenhash; /* ptr table for avoiding circular refs */ ptable_ptr weak_seenhash; /* ptr table for avoiding dangling weakrefs */ - ptable_ptr str_seenhash; /* ptr table for issuing COPY commands based on PTRS (used for classnames and keys) */ + ptable_ptr str_seenhash; /* ptr table for issuing COPY commands based on PTRS (used for classnames and keys) + * for now this is also coopted to track which objects we have dumped as objects, + * and to ensure we only output a given object once. + * Possibly this should be replaced with freezeobj_svhash, but this works fine. + */ ptable_ptr freezeobj_svhash; /* ptr table for tracking objects and their frozen replacments via FREEZE */ HV *string_deduper_hv; /* track strings we have seen before, by content */ @@ -45,6 +49,16 @@ typedef struct { U32 hash; } sv_with_hash; +typedef struct { + union { + SV *sv; + } key; + union { + HE *he; + SV *sv; + } val; +} HE_SV; + /* constructor from options */ srl_encoder_t *srl_build_encoder_struct(pTHX_ HV *opt, sv_with_hash *options); @@ -84,10 +98,12 @@ SV *srl_dump_data_structure_mortal_sv(pTHX_ srl_encoder_t *enc, SV *src, SV *use * set since we otherwise croak. Corresponds to the 'warn_unknown' option. */ #define SRL_F_WARN_UNKNOWN 0x00020UL -/* WARNING: SRL_F_COMPRESS_SNAPPY 0x00040UL - * SRL_F_COMPRESS_SNAPPY_INCREMENTAL 0x00080UL - * SRL_F_COMPRESS_ZLIB 0x00100UL - * are moved to srl_compress.h */ +/* WARNING: + * #define SRL_F_COMPRESS_SNAPPY 0x00040UL + * #define SRL_F_COMPRESS_SNAPPY_INCREMENTAL 0x00080UL + * #define SRL_F_COMPRESS_ZLIB 0x00100UL + * are moved to srl_compress.h + */ /* Only meaningful if SRL_F_WARN_UNKNOWN also set. If this one is set, then we don't warn * if the unsupported item has string overloading. */ @@ -116,13 +132,15 @@ SV *srl_dump_data_structure_mortal_sv(pTHX_ srl_encoder_t *enc, SV *src, SV *use /* if set in flags, then do not use ARRAYREF or HASHREF ever */ #define SRL_F_CANONICAL_REFS 0x08000UL +#define SRL_F_SORT_KEYS_PERL 0x10000UL +#define SRL_F_SORT_KEYS_PERL_REV 0x20000UL /* ==================================================================== * oper flags */ /* Set while the encoder is in active use / dirty */ #define SRL_OF_ENCODER_DIRTY 1UL -#define SRL_ENC_HAVE_OPTION(enc, flag_num) ((enc)->flags & flag_num) +#define SRL_ENC_HAVE_OPTION(enc, flag_num) ((enc)->flags & (flag_num)) #define SRL_ENC_SET_OPTION(enc, flag_num) STMT_START {(enc)->flags |= (flag_num);}STMT_END #define SRL_ENC_RESET_OPTION(enc, flag_num) STMT_START {(enc)->flags &= ~(flag_num);}STMT_END @@ -133,6 +151,14 @@ SV *srl_dump_data_structure_mortal_sv(pTHX_ srl_encoder_t *enc, SV *src, SV *use #define SRL_ENC_SV_COPY_ALWAYS 0x00000000UL #define SRL_ENC_SV_REUSE_MAYBE 0x00000001UL +#define SRL_UNSUPPORTED_SvTYPE(svt) ( \ + /* svt == SVt_INVLIST || */ \ + svt == SVt_PVGV || \ + svt == SVt_PVCV || \ + svt == SVt_PVFM || \ + svt == SVt_PVIO || \ + 0 ) + /* by default we do not allow people to build with support for SRL_HDR_LONG_DOUBLE */ #if defined(SRL_ALLOW_LONG_DOUBLE) && defined(USE_LONG_DOUBLE) && defined(HAS_LONG_DOUBLE) #define SRL_DO_LONG_DOUBLE 1 diff --git a/srl_reader_error.h b/srl_reader_error.h index f83f5c3..f478fe7 100644 --- a/srl_reader_error.h +++ b/srl_reader_error.h @@ -22,11 +22,13 @@ #define SRL_RDR_ERROR_PANIC(buf, msg) SRL_RDR_ERRORf1((buf), "Panic: %s", msg); /* trace functions */ +#ifdef WANT_SRL_RDR_TRACE #ifdef TRACE_READER # define SRL_RDR_TRACE(msg, args...) \ fprintf(stderr, "%s:%d:%s(): "msg"\n", __FILE__, __LINE__, __func__, ## args) #else # define SRL_RDR_TRACE(msg, args...) #endif +#endif #endif diff --git a/t/005_flags.t b/t/005_flags.t index 7c78740..f89676e 100644 --- a/t/005_flags.t +++ b/t/005_flags.t @@ -1,7 +1,6 @@ use strict; use warnings; -use Sereal::Decoder; use Test::More; use File::Spec; use lib File::Spec->catdir(qw(t lib)); @@ -18,7 +17,7 @@ if (have_encoder_and_decoder(3.005003)) { run_tests( "aliased_dedupe_strings", { aliased_dedupe_strings => 1 } ); done_testing(); } else { - plan skip_all => 'Did not find right version of encoder'; + plan skip_all => "Did not find right version of encoder/decoder"; } sub run_tests { @@ -69,6 +68,7 @@ sub run_tests { TODO: { # we must do this test before we test numeric equivalence + no warnings 'numeric'; my $have= ($decoded ^ '1'); my $want= ($tests{$test} ^ '1'); local $TODO = $have ne $want ? "Cannot reliably round trip NIOK flag(s)" : undef; diff --git a/t/030_canonical_vs_test_deep.t b/t/030_canonical_vs_test_deep.t index adc3bb8..1b3705e 100644 --- a/t/030_canonical_vs_test_deep.t +++ b/t/030_canonical_vs_test_deep.t @@ -33,7 +33,7 @@ eval { unless version->new(Test::Deep->VERSION) >= version->new('0.110'); 1; } or do { - my $error = $@ // "Zombie Error"; + my $error = $@ || "Zombie Error"; plan skip_all => "We are skipping all our tests because we don't have a suitable Test::Deep here, got error: $error"; }; plan tests => keys(%tests) * 2; diff --git a/t/040_tied_hash.t b/t/040_tied_hash.t new file mode 100644 index 0000000..6549ed8 --- /dev/null +++ b/t/040_tied_hash.t @@ -0,0 +1,71 @@ +use warnings; +use strict; + +package NewStdHash; +require Tie::Hash; +our @ISA = qw(Tie::StdHash); + +package main; +use Test::More; +use File::Spec; +use lib File::Spec->catdir(qw(t lib)); +BEGIN { + lib->import('lib') + if !-d 't'; +} +use Sereal::TestSet qw(:all); +my $u_df= "\xDFu"; +utf8::upgrade($u_df); +my @keys= ( + 'foo', 'bar', 'mip', 'xap', 'food', 'fool', 'fools', 'barking', 'bark', + $u_df, + "\x{df}a", + "\x{c3}", + "\x{de}", + "\x{e0}", + "\x{100}", + "\x{123}", + "\x{c4}\x{80}", +); +my $have_decoder= have_encoder_and_decoder(); +if ($have_decoder) { + plan tests => 1 + (4 * @keys); +} else { + plan tests => 1; +} + +my $enc = Sereal::Encoder->new({ + sort_keys => 1, +}); + +tie my %new_std_hash, 'NewStdHash'; +my %normal_hash; +foreach my $i (0..$#keys) { + $new_std_hash{$keys[$i]} = $i; + $normal_hash{$keys[$i]}= $i; +} + +my $enc_tied = $enc->encode(\%new_std_hash); +my $enc_normal= $enc->encode(\%normal_hash); + + +is($enc_tied, $enc_normal, "Tied and untied are the same") +or do { + diag "Normal:\n"; + hobodecode $enc_normal; + diag "Tied: \n"; + hobodecode $enc_tied; +}; + +if ($have_decoder) { + my $dec= Sereal::Decoder->new(); + my $dec_tied= $dec->decode($enc_tied); + my $dec_normal= $dec->decode($enc_normal); + foreach my $i (0..$#keys) { + is($dec_tied->{$keys[$i]},$i, "decoded tied"); + is($dec_normal->{$keys[$i]},$i, "decoded normal"); + is($new_std_hash{$keys[$i]},$i, "original tied"); + is($normal_hash{$keys[$i]},$i, "original normal"); + } +} + diff --git a/t/400_evil.t b/t/400_evil.t index 68415a3..5fc2f76 100644 --- a/t/400_evil.t +++ b/t/400_evil.t @@ -1,7 +1,6 @@ #!perl use strict; use warnings; -use Sereal::Encoder qw(:all); use Data::Dumper; use File::Spec; use Scalar::Util qw(blessed); @@ -23,6 +22,7 @@ if (not have_encoder_and_decoder()) { exit 0; } +Sereal::Encoder->import(":all"); Sereal::Decoder->import(":all"); # First, test tied hashes. Expected behaviour: We don't segfault, we don't diff --git a/t/700_roundtrip/v1/plain_canon.t b/t/700_roundtrip/v1/plain_canon.t index f16265e..981eb38 100644 --- a/t/700_roundtrip/v1/plain_canon.t +++ b/t/700_roundtrip/v1/plain_canon.t @@ -19,7 +19,7 @@ if (not $ok) { plan skip_all => 'Did not find right version of encoder (want 3.001006)'; } else { - run_roundtrip_tests("plain_canon", { canonical => 1 }); + run_roundtrip_tests("plain_canonical", { canonical => 1 }); } pass(); diff --git a/t/700_roundtrip/v2/plain_canon.t b/t/700_roundtrip/v2/plain_canon.t index f16265e..981eb38 100644 --- a/t/700_roundtrip/v2/plain_canon.t +++ b/t/700_roundtrip/v2/plain_canon.t @@ -19,7 +19,7 @@ if (not $ok) { plan skip_all => 'Did not find right version of encoder (want 3.001006)'; } else { - run_roundtrip_tests("plain_canon", { canonical => 1 }); + run_roundtrip_tests("plain_canonical", { canonical => 1 }); } pass(); diff --git a/t/700_roundtrip/v3/plain_canon.t b/t/700_roundtrip/v3/plain_canon.t index f16265e..981eb38 100644 --- a/t/700_roundtrip/v3/plain_canon.t +++ b/t/700_roundtrip/v3/plain_canon.t @@ -19,7 +19,7 @@ if (not $ok) { plan skip_all => 'Did not find right version of encoder (want 3.001006)'; } else { - run_roundtrip_tests("plain_canon", { canonical => 1 }); + run_roundtrip_tests("plain_canonical", { canonical => 1 }); } pass(); diff --git a/t/700_roundtrip/v3/sort_keys_perl.t b/t/700_roundtrip/v3/sort_keys_perl.t new file mode 100644 index 0000000..65f327a --- /dev/null +++ b/t/700_roundtrip/v3/sort_keys_perl.t @@ -0,0 +1,29 @@ +#!perl +use strict; +use warnings; +use Data::Dumper; +use File::Spec; + +use lib File::Spec->catdir(qw(t lib)); +BEGIN { + lib->import('lib') + if !-d 't'; +} + +use Sereal::TestSet qw(:all); +use Test::More; + +my $ok = have_encoder_and_decoder(); +if (not $ok) { + plan skip_all => 'Did not find right version of encoder'; +} +else { + run_roundtrip_tests( + 'sort_keys', { sort_keys => 2 } + ); +} + + +pass(); +done_testing(); + diff --git a/t/700_roundtrip/v3/sort_keys_perl_rev.t b/t/700_roundtrip/v3/sort_keys_perl_rev.t new file mode 100644 index 0000000..c874eed --- /dev/null +++ b/t/700_roundtrip/v3/sort_keys_perl_rev.t @@ -0,0 +1,29 @@ +#!perl +use strict; +use warnings; +use Data::Dumper; +use File::Spec; + +use lib File::Spec->catdir(qw(t lib)); +BEGIN { + lib->import('lib') + if !-d 't'; +} + +use Sereal::TestSet qw(:all); +use Test::More; + +my $ok = have_encoder_and_decoder(); +if (not $ok) { + plan skip_all => 'Did not find right version of encoder'; +} +else { + run_roundtrip_tests( + 'sort_keys', { sort_keys => 3 } + ); +} + + +pass(); +done_testing(); + diff --git a/t/800_threads.t b/t/800_threads.t index 089a304..42e8088 100644 --- a/t/800_threads.t +++ b/t/800_threads.t @@ -14,17 +14,27 @@ BEGIN { exit(0); } } +use File::Spec; +use lib File::Spec->catdir(qw(t lib)); +BEGIN { + lib->import('lib') + if !-d 't'; +} +use Sereal::TestSet qw(:all); use Sereal::Encoder; plan tests => 1; use threads; +use threads::shared; sub foo {} SCOPE: { - my $enc = Sereal::Encoder->new; + my $dat= shared_clone([undef]); + my $enc = Sereal::Encoder->new; - my $thr = threads->new(\&foo); - $thr->join; + my $thr = threads->new(\&foo); + $thr->join; + my $encoded= $enc->encode($dat); } pass(); diff --git a/t/lib/Sereal/TestSet.pm b/t/lib/Sereal/TestSet.pm index 01fc12c..9a650bb 100644 --- a/t/lib/Sereal/TestSet.pm +++ b/t/lib/Sereal/TestSet.pm @@ -15,11 +15,38 @@ use Scalar::Util qw(reftype blessed refaddr); use Config; use Carp qw(confess); use Storable qw(dclone); +use Cwd; # Dynamically load constants from whatever is being tested -our ($Class, $ConstClass); +our ($Class, $ConstClass, $InRepo); +sub get_git_top_dir { + my @dirs = (0, 1, 2, 4); + for my $d (@dirs) { + my $tdir = File::Spec->catdir(map File::Spec->updir, 1..$d); + my $gdir = File::Spec->catdir($tdir, '.git'); + return $tdir + if -d $gdir; + } + return(); +} + +BEGIN{ + if (defined(my $top_dir = get_git_top_dir())) { + for my $need ('Encoder', 'Decoder') { + my $blib_dir = File::Spec->catdir($top_dir, 'Perl', $need, "blib"); + if (-d $blib_dir) { + require blib; + blib->import($blib_dir); + } + } + $InRepo=1; + } +} BEGIN { - if (-e "lib/Sereal/Encoder.pm") { + if (-e "lib/Sereal.pm") { + $Class = 'Sereal::Encoder'; + } + elsif (-e "lib/Sereal/Encoder.pm") { $Class = 'Sereal::Encoder'; } elsif (-e "lib/Sereal/Decoder.pm") { @@ -31,7 +58,7 @@ BEGIN { elsif (-e "lib/Sereal/Splitter.pm") { $Class = 'Sereal::Splitter'; } else { - die "Could not find an applicable Sereal constants location"; + die "Could not find an applicable Sereal constants location (in: ",cwd(),")"; } $ConstClass = $Class . "::Constants"; eval "use $ConstClass ':all'; 1" @@ -601,16 +628,6 @@ sub setup_tests { } -sub get_git_top_dir { - my @dirs = (0, 1, 2); - for my $d (@dirs) { - my $tdir = File::Spec->catdir(map File::Spec->updir, 1..$d); - my $gdir = File::Spec->catdir($tdir, '.git'); - return $tdir - if -d $gdir; - } - return(); -} sub have_encoder_and_decoder { my ($min_v)= @_; @@ -618,13 +635,6 @@ sub have_encoder_and_decoder { my $need = $Class =~ /Encoder/ ? "Decoder" : "Encoder"; my $need_class = "Sereal::$need"; - if (defined(my $top_dir = get_git_top_dir())) { - my $blib_dir = File::Spec->catdir($top_dir, 'Perl', $need, "blib"); - if (-d $blib_dir) { - require blib; - blib->import($blib_dir); - } - } eval "use $Class; 1" or do { note("Could not locate $Class for testing" . ($@ ? " (Exception: $@)" : "")); @@ -638,7 +648,7 @@ sub have_encoder_and_decoder { }; my $cmp_v = $need_class->VERSION; if ($min_v and $cmp_v < $min_v) { - note("Could not load correct version of $need_class for testing " + diag("Could not load correct version of $need_class for testing " ."(got: $cmp_v, needed at least $min_v)"); return; } @@ -646,7 +656,7 @@ sub have_encoder_and_decoder { $cmp_v = sprintf("%.2f", int($cmp_v*100)/100); my %compat_versions = map {$_ => 1} $Class->_test_compat(); if (not defined $cmp_v or not exists $compat_versions{$cmp_v}) { - note("Could not load correct version of $need_class for testing " + diag("Could not load correct version of $need_class for testing " ."(got: $cmp_v, needed any of ".join(", ", keys %compat_versions).")"); return(); } @@ -681,6 +691,10 @@ my $eng2= "1e3"; my $sum= $eng0e0 + $eng0e1 + $eng2; +sub encoder_required { + my ($ver, $name)= @_; + return "" . ( $Sereal::Encoder::VERSION < $ver ? "TODO " : "") . $name; +} sub _get_roundtrip_tests { my @ScalarRoundtripTests = ( @@ -734,8 +748,8 @@ sub _get_roundtrip_tests { 1023,1024,1025, 8191,8192,8193, )), - ( map { [ ( $Sereal::Encoder::VERSION < 3.005002 ? "TODO" : "") - . " troublesome num/strs '$_'", $_ ] } @numstr ), + ( map { [ encoder_required(3.005002, " troublesome num/strs '$_'"), + $_ ] } @numstr ), ["long latin1 string", "üll" x 10000], ["long utf8 string", do {use utf8; " עדיין חשב" x 10000}], ["long utf8 string with only ascii", do {use utf8; "foo" x 10000}], @@ -810,8 +824,27 @@ sub _get_roundtrip_tests { [ "undef", [\undef, \undef] ], ); + my @blessed_array_check1; + $blessed_array_check1[0]= "foo"; + $blessed_array_check1[1]= bless \$blessed_array_check1[0], "BlessedArrayCheck"; + $blessed_array_check1[2]= \$blessed_array_check1[0]; + + my @blessed_array_check2= (3,0,0,3); + $blessed_array_check2[1]= \$blessed_array_check2[0]; + $blessed_array_check2[2]= \$blessed_array_check2[3]; + bless \$blessed_array_check2[0], "BlessedArrayCheck"; + bless \$blessed_array_check2[3], "BlessedArrayCheck"; + + my @sc_array=(1,1); + $sc_array[0]=bless \$sc_array[1], "BlessedArrayLeft"; + $sc_array[1]=bless \$sc_array[0], "BlessedArrayRight"; + + my @RoundtripTests = ( @ScalarRoundtripTests, + [ encoder_required(3.006006,"BlessedArrayCheck 1"), \@blessed_array_check1 ], + [ encoder_required(3.006006,"BlessedArrayCheck 2"), \@blessed_array_check2 ], + [ encoder_required(3.006006,"Scalar Cross Blessed Array"), \@sc_array ], ["[{foo => 1}, {foo => 2}] - repeated hash keys", [{foo => 1}, {foo => 2}] ], @@ -1099,7 +1132,8 @@ sub run_roundtrip_tests_internal { eval {$decoded2 = $dec->($encoded2); 1} or do { my $err = $@ || 'Zombie error'; - diag("Got error while encoding the second time: $err"); + diag("Got error while decoding the second time: $err"); + # hobodecode($encoded2); }; defined($decoded2) == defined($data) @@ -1127,7 +1161,7 @@ sub run_roundtrip_tests_internal { eval {$decoded3 = $dec->($encoded3); 1} or do { my $err = $@ || 'Zombie error'; - diag("Got error while encoding the third time: $err"); + diag("Got error while decoding the third time: $err"); }; defined($decoded3) == defined($data)