* of the decoder before upgrading to version 3 of the *
* encoder! *
****************************************************************
+3.008 Nov 27 2015
+ * FIX: Make sure that reserializing a data structure created
+ using alias_varint_under does not produce a corrupted dataset.
+ Thanks to Iskra for the report.
+ * FIX: Precedence issue related to sort keys options. Thanks
+ to Petr Písař <ppisar@redhat.com> for the report and fix.
+ * FIX: Win32 build issues, thanks to bulk88 for help.
+
+3.007 Nov 26 2015
+ * FIX: sorting should now work with tied hashes, and
+ be in general faster.
+ * CHANGED: Sort order for sort_keys=1 is now defined to be
+ "in order by length of bytes, then by byte order of the
+ underlying string, then by utd8ness, with non-utf8 first".
+ This sort order was chosen because it requires the least
+ operations to perform in the most cases, has a bounded number
+ of cases where we would have to create any temporary SV's or
+ to do operations that require us to introspect codepoints in
+ utf8 strings. IOW purely performance. Note that so long
+ as you compare data created with the same sort_keys setting
+ you will ALWAYS get the same order *in process*, regardless
+ of what you choose. Backwards compatible support for the
+ old order is available via sort_keys=3.
+
+ ADDED: sort_keys=2 to provide "perl cmp order"
+ (Its just there because it is easy to do, not for any
+ good reason.)
+ ADDED: sort_keys=3 to provide "rev perl cmp order"
+
+ Legacy sort order was equivalent to sort_keys=3, use this
+ option *ONLY* if you are extremely sensitive to changes in the
+ sorted order (or "canonical form").
+
+ * [DECODER] Build fixes for boxes without any Sereal installed (it seems
+ to be common on certain types of smokers, and not on others).
+ * Fix issues with serializing blessed scalars where we see the
+ blessed scalar before we see the reference to it.
+ * Handle PVLV undefs
+ * Build fixes for 5.8.9
+ * Doc patches
+ * Optimizations
+
3.006 Nov 14 2015
* [DECODER] fix segfaults from heavily corrupted data. Guards
against various pathological cases which could cause segfaults
#include "perl.h"
#include "XSUB.h"
-#define NEED_newSV_type
+#define NEED_newSV_type_GLOBAL
#include "ppport.h"
#include "srl_encoder.h"
CODE:
srl_destroy_encoder(aTHX_ enc);
+U32
+flags(enc)
+ srl_encoder_t *enc;
+ CODE:
+ RETVAL = enc->flags;
+ OUTPUT: RETVAL
void
encode_sereal(src, opt = NULL)
}
PTABLE_iter_free(iter);
PTABLE_free(tbl);
+
+
author_tools/hobodecoder.pl
author_tools/numeric_str_length.c
author_tools/stringify_test.c
+author_tools/update_flag_consts.pl
author_tools/update_from_header.pl
author_tools/valgrind.supp
Changes
miniz.h
ppport.h
ptable.h
+qsort.h
snappy/csnappy.h
snappy/csnappy_compat.h
snappy/csnappy_compress.c
t/021_sort_keys_option.t
t/022_canonical_refs.t
t/030_canonical_vs_test_deep.t
+t/040_tied_hash.t
t/110_nobless.t
t/120_hdr_data.t
t/130_freezethaw.t
t/700_roundtrip/v3/snappy_incr.t
t/700_roundtrip/v3/snappy_incr_canon.t
t/700_roundtrip/v3/sort_keys.t
+t/700_roundtrip/v3/sort_keys_perl.t
+t/700_roundtrip/v3/sort_keys_perl_rev.t
t/700_roundtrip/v3/zlib.t
t/700_roundtrip/v3/zlib_force.t
t/800_threads.t
"Steffen Mueller <smueller@cpan.org>, Yves Orton <yves@cpan.org>"
],
"dynamic_config" : 1,
- "generated_by" : "ExtUtils::MakeMaker version 7.0401, CPAN::Meta::Converter version 2.150001",
+ "generated_by" : "ExtUtils::MakeMaker version 6.66, CPAN::Meta::Converter version 2.133380",
"license" : [
"perl_5"
],
"inc"
]
},
- "prereqs" : {
- "build" : {
- "requires" : {
- "ExtUtils::ParseXS" : "2.21",
- "File::Find" : "0",
- "File::Path" : "0"
- }
- },
- "configure" : {
- "requires" : {
- "ExtUtils::MakeMaker" : "7.0"
- }
- },
- "runtime" : {
- "requires" : {
- "XSLoader" : "0",
- "perl" : "5.008"
- }
- },
- "test" : {
- "requires" : {
- "Data::Dumper" : "0",
- "File::Spec" : "0",
- "Scalar::Util" : "0",
- "Sereal::Decoder" : "3.00",
- "Test::LongString" : "0",
- "Test::More" : "0.88",
- "Test::Warn" : "0"
- }
- }
- },
"release_status" : "stable",
"resources" : {
"bugtracker" : {
"url" : "git://github.com/Sereal/Sereal.git"
}
},
- "version" : "3.006"
+ "version" : "3.008"
}
abstract: 'Fast, compact, powerful binary serialization'
author:
- 'Steffen Mueller <smueller@cpan.org>, Yves Orton <yves@cpan.org>'
-build_requires:
- Data::Dumper: '0'
- ExtUtils::ParseXS: '2.21'
- File::Find: '0'
- File::Path: '0'
- File::Spec: '0'
- Scalar::Util: '0'
- Sereal::Decoder: '3.00'
- Test::LongString: '0'
- Test::More: '0.88'
- Test::Warn: '0'
-configure_requires:
- ExtUtils::MakeMaker: '7.0'
+build_requires: {}
dynamic_config: 1
-generated_by: 'ExtUtils::MakeMaker version 7.0401, CPAN::Meta::Converter version 2.150001'
+generated_by: 'ExtUtils::MakeMaker version 6.66, CPAN::Meta::Converter version 2.133380'
license: perl
meta-spec:
url: http://module-build.sourceforge.net/META-spec-v1.4.html
- version: '1.4'
+ version: 1.4
name: Sereal-Encoder
no_index:
directory:
- t
- inc
-requires:
- XSLoader: '0'
- perl: '5.008'
resources:
bugtracker: https://github.com/Sereal/Sereal/issues
repository: git://github.com/Sereal/Sereal.git
-version: '3.006'
+version: 3.008
use Data::Dumper;
use Getopt::Long qw(GetOptions);
+use Encode qw(encode_utf8 decode_utf8);
our @constants;
no warnings 'recursion';
BEGIN {
my $len = $o;
my $str = _chop_data_prefix( $len );
$done .= $str;
- printf "SHORT_BINARY(%u): '%s'\n", $len, $str;
+ printf "SHORT_BINARY(%u): '%s' (%s)\n", $len, encode_utf8($str), unpack("H*", $str);
}
elsif ($o == SRL_HDR_BINARY || $o == SRL_HDR_STR_UTF8) {
my $l = varint();
my $str = _chop_data_prefix( $l ); # fixme UTF8
$done .= $str;
- printf( ($o == SRL_HDR_STR_UTF8 ? "STR_UTF8" : "BINARY")."(%u): '%s'\n", $l, $str);
+ $str= decode_utf8($str) if $o == SRL_HDR_STR_UTF8;
+ printf( ($o == SRL_HDR_STR_UTF8 ? "STR_UTF8" : "BINARY")."(%u): '%s' (%s)\n", $l, encode_utf8($str), unpack("H*", encode_utf8($str)));
}
elsif ($o == SRL_HDR_FLOAT) {
printf "FLOAT(%f)\n", parse_float();
--- /dev/null
+use strict;
+use warnings;
+use Data::Dumper;
+
+my %sets;
+my %flag_consts;
+my %flag_names;
+my (@flags, @static, @volatile);
+my $file= "srl_encoder.h";
+{
+ open my $fh, "<", $file
+ or die "Failed to open '$file' for read: $!";
+
+ while (<$fh>) {
+ if ( m/#define (SRL_F_(\w+))\s+(.*)/ || /(\w+(VOLATILE_FLAGS))\s+(.*)/ ) {
+ #print;
+ my $full_name= $1;
+ my $name= $2;
+ my $value= $3;
+ $name=~s/_?ENCODER_?//g;
+ $flag_names{$full_name}= $name;
+ if ($value=~s/UL\z//) {
+ $flag_consts{$full_name}= 0+eval $value;
+ } else {
+ $value =~ s/(SRL_F_\w+)/\$flag_consts{$1}/g;
+ $sets{"SRL_F_ENCODER_" . $name}= 0+eval $value;
+ }
+ }
+ }
+ foreach my $key ( sort { $flag_consts{$a} <=> $flag_consts{$b} } keys %flag_consts ) {
+ if (defined $sets{SRL_F_ENCODER_VOLATILE_FLAGS}) {
+ my $is_volatile= $flag_consts{$key} & $sets{SRL_F_ENCODER_VOLATILE_FLAGS};
+ push @static, $is_volatile ? undef : $flag_names{$key};
+ push @volatile, $is_volatile ? $flag_names{$key} : undef;
+ }
+ push @flags, $flag_names{$key};
+ }
+}
+
+my %consts= (%sets, %flag_consts);
+$consts{_FLAG_NAME}= \@flags;
+if (@volatile) {
+ $consts{_FLAG_NAME_VOLATILE}= \@volatile;
+ $consts{_FLAG_NAME_STATIC}= \@static;
+}
+
+my $infile= "lib/Sereal/Encoder.pm";
+my $outfile= "$infile.new";
+open my $fh,"<", $infile
+ or die "Failed to read '$infile': $!";
+open my $ofh, ">", $outfile
+ or die "Failed to write to '$outfile': $!";
+while (<$fh>) {
+ if (/#begin generated/) {
+ print $ofh $_;
+ my $s= Data::Dumper->new([\%consts])->Sortkeys(1)->Terse(1)->Dump();
+ chop($s);
+ $s.="; #end generated\n";
+ print $ofh $s;
+ }
+ unless(/#begin generated/ ... /#end generated/) {
+ print $ofh $_;
+ }
+}
+close $ofh;
+close $fh;
+rename $infile, "$infile.bak" or die "Failed to rename '$infile' to '$infile.bak': $!";
+rename $outfile, $infile or die "Failed to rename '$outfile' to '$infile': $!";
+
use warnings;
use Config;
+use constant OSNAME => $^O;
sub link_files {
my $shared_dir = shift;
- my $exlude_tests = shift;
+ my $do_tests = shift || "";
+ my $exclude_tests= $do_tests eq "without_tests";
+ my $tests_only= $do_tests eq "tests_only";
+
# This fires from a git source tree only.
# Right now, all devs are on Linux. Feel free to make portable.
eval {
my $f = $_;
s/^\Q$shared_dir\E\/?// or die $_;
return unless $_;
- return if $exlude_tests && m#^/?t/#;
+ return if $exclude_tests && m#^/?t/#;
+ return if $tests_only && !m#^/?t/#;
if (-d $f) {
File::Path::mkpath($_)
my $ref = join "/", ("..") x scalar(@d);
my $subd = join "/", @d;
chdir $subd if length($ref);
- symlink(join("/", grep length, $ref, $shared_dir, $subd, $fname), $fname);
+ my $srcfname = join("/", grep length, $ref, $shared_dir, $subd, $fname);
+ if (OSNAME eq 'MSWin32') {
+ die "link($srcfname, $fname) failed: $!"
+ unless link($srcfname, $fname); #only NTFS implements it
+ }
+ else {
+ symlink($srcfname, $fname);
+ }
chdir($ref) if length($ref);
}
},
use Carp qw/croak/;
use XSLoader;
-our $VERSION = '3.006'; # Don't forget to update the TestCompat set for testing against installed decoders!
+our $VERSION = '3.008'; # Don't forget to update the TestCompat set for testing against installed decoders!
our $XS_VERSION = $VERSION; $VERSION= eval $VERSION;
# not for public consumption, just for testing.
# Make sure to keep these constants in sync with the C code in srl_encoder.c.
# I know they could be exported from C using things like ExtUtils::Constant,
# but that's too much of a hassle for just three numbers.
-use constant SRL_UNCOMPRESSED => 0;
-use constant SRL_SNAPPY => 1;
-use constant SRL_ZLIB => 2;
+use constant {
+ SRL_UNCOMPRESSED => 0,
+ SRL_SNAPPY => 1,
+ SRL_ZLIB => 2,
+};
+use constant #begin generated
+{
+ 'SRL_F_ALIASED_DEDUPE_STRINGS' => 4096,
+ 'SRL_F_CANONICAL_REFS' => 32768,
+ 'SRL_F_COMPRESS_SNAPPY' => 64,
+ 'SRL_F_COMPRESS_SNAPPY_INCREMENTAL' => 128,
+ 'SRL_F_COMPRESS_ZLIB' => 256,
+ 'SRL_F_CROAK_ON_BLESS' => 4,
+ 'SRL_F_DEDUPE_STRINGS' => 2048,
+ 'SRL_F_ENABLE_FREEZE_SUPPORT' => 16384,
+ 'SRL_F_NOWARN_UNKNOWN_OVERLOAD' => 512,
+ 'SRL_F_NO_BLESS_OBJECTS' => 8192,
+ 'SRL_F_REUSE_ENCODER' => 2,
+ 'SRL_F_SHARED_HASHKEYS' => 1,
+ 'SRL_F_SORT_KEYS' => 1024,
+ 'SRL_F_SORT_KEYS_PERL' => 65536,
+ 'SRL_F_SORT_KEYS_PERL_REV' => 131072,
+ 'SRL_F_STRINGIFY_UNKNOWN' => 16,
+ 'SRL_F_UNDEF_UNKNOWN' => 8,
+ 'SRL_F_WARN_UNKNOWN' => 32,
+ '_FLAG_NAME' => [
+ 'SHARED_HASHKEYS',
+ 'REUSE',
+ 'CROAK_ON_BLESS',
+ 'UNDEF_UNKNOWN',
+ 'STRINGIFY_UNKNOWN',
+ 'WARN_UNKNOWN',
+ 'COMPRESS_SNAPPY',
+ 'COMPRESS_SNAPPY_INCREMENTAL',
+ 'COMPRESS_ZLIB',
+ 'NOWARN_UNKNOWN_OVERLOAD',
+ 'SORT_KEYS',
+ 'DEDUPE_STRINGS',
+ 'ALIASED_DEDUPE_STRINGS',
+ 'NO_BLESS_OBJECTS',
+ 'ENABLE_FREEZE_SUPPORT',
+ 'CANONICAL_REFS',
+ 'SORT_KEYS_PERL',
+ 'SORT_KEYS_PERL_REV'
+ ]
+}; #end generated
use Exporter 'import';
our @EXPORT_OK = qw(
XSLoader::load('Sereal::Encoder', $XS_VERSION);
+sub encode_to_file {
+ my ($self, $file, $struct, $append)= @_;
+ my $mode= $append ? ">>" : ">";
+ open my $fh, $mode, $file
+ or die "Failed to open '$file' for " . ($append ? "append" : "write") . ": $!";
+ print $fh $self->encode($struct)
+ or die "Failed to print to '$file': $!";
+ close $fh
+ or die "Failed to close '$file': $!";
+}
+
+my $flags= sub {
+ my ($int, $ary)= @_;
+ return map {
+ ($ary->[$_] and $int & (1 << $_)) ? $ary->[$_] : ()
+ } (0..$#$ary);
+};
+
+sub flag_names {
+ my ($self, $val)= @_;
+ return $flags->($val // $self->flags, _FLAG_NAME);
+}
+
1;
__END__
keys), and so two hashes that might appear to be the same might still produce
different output as far as Sereal is concerned.
-The thusly allocated encoder object and its output buffer will be reused
-between invocations of C<encode()>, so hold on to it for an efficiency
-gain if you plan to serialize multiple similar data structures, but destroy
-it if you serialize a single very large data structure just once to free
-the memory.
+As of 3.006_007 (prerelease candidate for 3.007) the sort order has been changed
+to the following: order by length of keys (in bytes) ascending, then by byte
+order of the raw underlying string, then by utf8ness, with non-utf8 first. This
+order was chosen because it is the most efficient to implement, both in terms
+of memory and time. This sort order is enabled when sort_keys is set to 1.
+
+You may also produce output in Perl "cmp" order, by setting sort_keys to 2.
+And for backwards compatibility you may also produce output in reverse Perl
+"cmp" order by setting sort_keys to 3. Prior to 3.006_007 this was the
+only sort order possible, although it was not explicitly defined what it was.
+
+Note that comparatively speaking both of the "cmp" sort orders are slow and
+memory inefficient. Unless you have a really good reason stick to the default
+which is fast and as lean as possible.
+
+Unless you are concerned with "cross process canonical representation" then
+it doesn't matter what option you choose.
See L</CANONICAL REPRESENTATION> for why you might want to use this, and
for the various caveats involved.
It is possible that a future release of the protocol will fix these issues.
-=back 4
+=back
=head1 BUGS, CONTACT AND SUPPORT
require Exporter;
our @ISA= qw(Exporter);
-our $VERSION = '3.006'; # Don't forget to update the TestCompat set for testing against installed encoders!
+our $VERSION = '3.008'; # Don't forget to update the TestCompat set for testing against installed encoders!
our (@EXPORT_OK, %DEFINE, %TAG_INFO_HASH, @TAG_INFO_ARRAY);
--- /dev/null
+/* $Id: qsort.h,v 1.5 2008-01-28 18:16:49 mjt Exp $
+ * Adopted from GNU glibc by Mjt.
+ * See stdlib/qsort.c in glibc */
+
+/* Copyright (C) 1991, 1992, 1996, 1997, 1999 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Written by Douglas C. Schmidt (schmidt@ics.uci.edu).
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+/* in-line qsort implementation. Differs from traditional qsort() routine
+ * in that it is a macro, not a function, and instead of passing an address
+ * of a comparison routine to the function, it is possible to inline
+ * comparison routine, thus speeding up sorting a lot.
+ *
+ * Usage:
+ * #include "iqsort.h"
+ * #define islt(a,b) (strcmp((*a),(*b))<0)
+ * char *arr[];
+ * int n;
+ * QSORT(char*, arr, n, islt);
+ *
+ * The "prototype" and 4 arguments are:
+ * QSORT(TYPE,BASE,NELT,ISLT)
+ * 1) type of each element, TYPE,
+ * 2) address of the beginning of the array, of type TYPE*,
+ * 3) number of elements in the array, and
+ * 4) comparision routine.
+ * Array pointer and number of elements are referenced only once.
+ * This is similar to a call
+ * qsort(BASE,NELT,sizeof(TYPE),ISLT)
+ * with the difference in last parameter.
+ * Note the islt macro/routine (it receives pointers to two elements):
+ * the only condition of interest is whenever one element is less than
+ * another, no other conditions (greather than, equal to etc) are tested.
+ * So, for example, to define integer sort, use:
+ * #define islt(a,b) ((*a)<(*b))
+ * QSORT(int, arr, n, islt)
+ *
+ * The macro could be used to implement a sorting function (see examples
+ * below), or to implement the sorting algorithm inline. That is, either
+ * create a sorting function and use it whenever you want to sort something,
+ * or use QSORT() macro directly instead a call to such routine. Note that
+ * the macro expands to quite some code (compiled size of int qsort on x86
+ * is about 700..800 bytes).
+ *
+ * Using this macro directly it isn't possible to implement traditional
+ * qsort() routine, because the macro assumes sizeof(element) == sizeof(TYPE),
+ * while qsort() allows element size to be different.
+ *
+ * Several ready-to-use examples:
+ *
+ * Sorting array of integers:
+ * void int_qsort(int *arr, unsigned n) {
+ * #define int_lt(a,b) ((*a)<(*b))
+ * QSORT(int, arr, n, int_lt);
+ * }
+ *
+ * Sorting array of string pointers:
+ * void str_qsort(char *arr[], unsigned n) {
+ * #define str_lt(a,b) (strcmp((*a),(*b)) < 0)
+ * QSORT(char*, arr, n, str_lt);
+ * }
+ *
+ * Sorting array of structures:
+ *
+ * struct elt {
+ * int key;
+ * ...
+ * };
+ * void elt_qsort(struct elt *arr, unsigned n) {
+ * #define elt_lt(a,b) ((a)->key < (b)->key)
+ * QSORT(struct elt, arr, n, elt_lt);
+ * }
+ *
+ * And so on.
+ */
+
+/* Swap two items pointed to by A and B using temporary buffer t. */
+#define _QSORT_SWAP(a, b, t) ((void)((t = *a), (*a = *b), (*b = t)))
+
+/* Discontinue quicksort algorithm when partition gets below this size.
+ This particular magic number was chosen to work best on a Sun 4/260. */
+#define _QSORT_MAX_THRESH 4
+
+/* Stack node declarations used to store unfulfilled partition obligations
+ * (inlined in QSORT).
+typedef struct {
+ QSORT_TYPE *_lo, *_hi;
+} qsort_stack_node;
+ */
+
+/* The next 4 #defines implement a very fast in-line stack abstraction. */
+/* The stack needs log (total_elements) entries (we could even subtract
+ log(MAX_THRESH)). Since total_elements has type unsigned, we get as
+ upper bound for log (total_elements):
+ bits per byte (CHAR_BIT) * sizeof(unsigned). */
+#define _QSORT_STACK_SIZE (8 * sizeof(unsigned))
+#define _QSORT_PUSH(top, low, high) \
+ (((top->_lo = (low)), (top->_hi = (high)), ++top))
+#define _QSORT_POP(low, high, top) \
+ ((--top, (low = top->_lo), (high = top->_hi)))
+#define _QSORT_STACK_NOT_EMPTY (_stack < _top)
+
+
+/* Order size using quicksort. This implementation incorporates
+ four optimizations discussed in Sedgewick:
+
+ 1. Non-recursive, using an explicit stack of pointer that store the
+ next array partition to sort. To save time, this maximum amount
+ of space required to store an array of SIZE_MAX is allocated on the
+ stack. Assuming a 32-bit (64 bit) integer for size_t, this needs
+ only 32 * sizeof(stack_node) == 256 bytes (for 64 bit: 1024 bytes).
+ Pretty cheap, actually.
+
+ 2. Chose the pivot element using a median-of-three decision tree.
+ This reduces the probability of selecting a bad pivot value and
+ eliminates certain extraneous comparisons.
+
+ 3. Only quicksorts TOTAL_ELEMS / MAX_THRESH partitions, leaving
+ insertion sort to order the MAX_THRESH items within each partition.
+ This is a big win, since insertion sort is faster for small, mostly
+ sorted array segments.
+
+ 4. The larger of the two sub-partitions is always pushed onto the
+ stack first, with the algorithm then concentrating on the
+ smaller partition. This *guarantees* no more than log (total_elems)
+ stack size is needed (actually O(1) in this case)! */
+
+/* The main code starts here... */
+#define QSORT(QSORT_TYPE,QSORT_BASE,QSORT_NELT,QSORT_LT) \
+{ \
+ QSORT_TYPE *const _base = (QSORT_BASE); \
+ const unsigned _elems = (QSORT_NELT); \
+ QSORT_TYPE _hold; \
+ \
+ /* Don't declare two variables of type QSORT_TYPE in a single \
+ * statement: eg `TYPE a, b;', in case if TYPE is a pointer, \
+ * expands to `type* a, b;' wich isn't what we want. \
+ */ \
+ \
+ if (_elems > _QSORT_MAX_THRESH) { \
+ QSORT_TYPE *_lo = _base; \
+ QSORT_TYPE *_hi = _lo + _elems - 1; \
+ struct { \
+ QSORT_TYPE *_hi; QSORT_TYPE *_lo; \
+ } _stack[_QSORT_STACK_SIZE], *_top = _stack + 1; \
+ \
+ while (_QSORT_STACK_NOT_EMPTY) { \
+ QSORT_TYPE *_left_ptr; QSORT_TYPE *_right_ptr; \
+ \
+ /* Select median value from among LO, MID, and HI. Rearrange \
+ LO and HI so the three values are sorted. This lowers the \
+ probability of picking a pathological pivot value and \
+ skips a comparison for both the LEFT_PTR and RIGHT_PTR in \
+ the while loops. */ \
+ \
+ QSORT_TYPE *_mid = _lo + ((_hi - _lo) >> 1); \
+ \
+ if (QSORT_LT (_mid, _lo)) \
+ _QSORT_SWAP (_mid, _lo, _hold); \
+ if (QSORT_LT (_hi, _mid)) { \
+ _QSORT_SWAP (_mid, _hi, _hold); \
+ if (QSORT_LT (_mid, _lo)) \
+ _QSORT_SWAP (_mid, _lo, _hold); \
+ } \
+ \
+ _left_ptr = _lo + 1; \
+ _right_ptr = _hi - 1; \
+ \
+ /* Here's the famous ``collapse the walls'' section of quicksort. \
+ Gotta like those tight inner loops! They are the main reason \
+ that this algorithm runs much faster than others. */ \
+ do { \
+ while (QSORT_LT (_left_ptr, _mid)) \
+ ++_left_ptr; \
+ \
+ while (QSORT_LT (_mid, _right_ptr)) \
+ --_right_ptr; \
+ \
+ if (_left_ptr < _right_ptr) { \
+ _QSORT_SWAP (_left_ptr, _right_ptr, _hold); \
+ if (_mid == _left_ptr) \
+ _mid = _right_ptr; \
+ else if (_mid == _right_ptr) \
+ _mid = _left_ptr; \
+ ++_left_ptr; \
+ --_right_ptr; \
+ } \
+ else if (_left_ptr == _right_ptr) { \
+ ++_left_ptr; \
+ --_right_ptr; \
+ break; \
+ } \
+ } while (_left_ptr <= _right_ptr); \
+ \
+ /* Set up pointers for next iteration. First determine whether \
+ left and right partitions are below the threshold size. If so, \
+ ignore one or both. Otherwise, push the larger partition's \
+ bounds on the stack and continue sorting the smaller one. */ \
+ \
+ if (_right_ptr - _lo <= _QSORT_MAX_THRESH) { \
+ if (_hi - _left_ptr <= _QSORT_MAX_THRESH) \
+ /* Ignore both small partitions. */ \
+ _QSORT_POP (_lo, _hi, _top); \
+ else \
+ /* Ignore small left partition. */ \
+ _lo = _left_ptr; \
+ } \
+ else if (_hi - _left_ptr <= _QSORT_MAX_THRESH) \
+ /* Ignore small right partition. */ \
+ _hi = _right_ptr; \
+ else if (_right_ptr - _lo > _hi - _left_ptr) { \
+ /* Push larger left partition indices. */ \
+ _QSORT_PUSH (_top, _lo, _right_ptr); \
+ _lo = _left_ptr; \
+ } \
+ else { \
+ /* Push larger right partition indices. */ \
+ _QSORT_PUSH (_top, _left_ptr, _hi); \
+ _hi = _right_ptr; \
+ } \
+ } \
+ } \
+ \
+ /* Once the BASE array is partially sorted by quicksort the rest \
+ is completely sorted using insertion sort, since this is efficient \
+ for partitions below MAX_THRESH size. BASE points to the \
+ beginning of the array to sort, and END_PTR points at the very \
+ last element in the array (*not* one beyond it!). */ \
+ \
+ { \
+ QSORT_TYPE *const _end_ptr = _base + _elems - 1; \
+ QSORT_TYPE *_tmp_ptr = _base; \
+ register QSORT_TYPE *_run_ptr; \
+ QSORT_TYPE *_thresh; \
+ \
+ _thresh = _base + _QSORT_MAX_THRESH; \
+ if (_thresh > _end_ptr) \
+ _thresh = _end_ptr; \
+ \
+ /* Find smallest element in first threshold and place it at the \
+ array's beginning. This is the smallest array element, \
+ and the operation speeds up insertion sort's inner loop. */ \
+ \
+ for (_run_ptr = _tmp_ptr + 1; _run_ptr <= _thresh; ++_run_ptr) \
+ if (QSORT_LT (_run_ptr, _tmp_ptr)) \
+ _tmp_ptr = _run_ptr; \
+ \
+ if (_tmp_ptr != _base) \
+ _QSORT_SWAP (_tmp_ptr, _base, _hold); \
+ \
+ /* Insertion sort, running from left-hand-side \
+ * up to right-hand-side. */ \
+ \
+ _run_ptr = _base + 1; \
+ while (++_run_ptr <= _end_ptr) { \
+ _tmp_ptr = _run_ptr - 1; \
+ while (QSORT_LT (_run_ptr, _tmp_ptr)) \
+ --_tmp_ptr; \
+ \
+ ++_tmp_ptr; \
+ if (_tmp_ptr != _run_ptr) { \
+ QSORT_TYPE *_trav = _run_ptr + 1; \
+ while (--_trav >= _run_ptr) { \
+ QSORT_TYPE *_hi; QSORT_TYPE *_lo; \
+ _hold = *_trav; \
+ \
+ for (_hi = _lo = _trav; --_lo >= _tmp_ptr; _hi = _lo) \
+ *_hi = *_lo; \
+ *_hi = _hold; \
+ } \
+ } \
+ } \
+ } \
+ \
+}
* http://en.wikipedia.org/wiki/FLAGS_register_(computing)
*/
#ifdef SRL_X86_OR_X64_CPU
-# if __x86_64__ || __x86_64
+# if defined(__x86_64__) || defined(__x86_64)
# define SRL_TRY_ENABLE_STRICT_ALIGN() asm("pushf\norl $0x40000, (%rsp)\npopf")
-# elif __i386__ || __i386
+# elif defined(__i386__) || defined(__i386)
# define SRL_TRY_ENABLE_STRICT_ALIGN() asm("pushf\norl $0x40000, (%esp)\npopf")
# endif
#else
#include "ptable.h"
#include "srl_buffer.h"
#include "srl_compress.h"
+#include "qsort.h"
/* The ENABLE_DANGEROUS_HACKS (passed through from ENV via Makefile.PL) enables
* optimizations that may make the code so cozy with a particular version of the
SRL_STATIC_INLINE void srl_dump_hk(pTHX_ srl_encoder_t *enc, HE *src, const int share_keys);
SRL_STATIC_INLINE void srl_dump_nv(pTHX_ srl_encoder_t *enc, SV *src);
SRL_STATIC_INLINE void srl_dump_ivuv(pTHX_ srl_encoder_t *enc, SV *src);
-SRL_STATIC_INLINE void srl_dump_classname(pTHX_ srl_encoder_t *enc, SV *referent, SV *replacement);
+SRL_STATIC_INLINE int srl_dump_classname(pTHX_ srl_encoder_t *enc, SV *referent, SV *replacement);
SRL_STATIC_INLINE SV *srl_get_frozen_object(pTHX_ srl_encoder_t *enc, SV *src, SV *referent);
SRL_STATIC_INLINE PTABLE_t *srl_init_string_hash(srl_encoder_t *enc);
SRL_STATIC_INLINE PTABLE_t *srl_init_ref_hash(srl_encoder_t *enc);
? srl_init_weak_hash(enc) \
: (enc)->weak_seenhash )
+#define SRL_GET_WEAK_SEENHASH_OR_NULL(enc) ((enc)->weak_seenhash)
+
#define SRL_GET_FREEZEOBJ_SVHASH(enc) ( (enc)->freezeobj_svhash == NULL \
? srl_init_freezeobj_svhash(enc) \
: (enc)->freezeobj_svhash )
srl_empty_encoder_struct(pTHX)
{
srl_encoder_t *enc;
- Newx(enc, 1, srl_encoder_t);
+ Newxz(enc, 1, srl_encoder_t);
if (enc == NULL)
croak("Out of memory");
croak("Out of memory");
}
- /* Set the tmp buffer struct's char buffer to NULL so we don't free
- * something nasty if it's unused. */
- enc->tmp_buf.start = NULL;
-
enc->protocol_version = SRL_PROTOCOL_VERSION;
- enc->recursion_depth = 0;
enc->max_recursion_depth = DEFAULT_MAX_RECUR_DEPTH;
- enc->operational_flags = 0;
- /*enc->flags = 0;*/ /* to be set elsewhere */
-
- enc->weak_seenhash = NULL;
- enc->str_seenhash = NULL;
- enc->ref_seenhash = NULL;
- enc->snappy_workmem = NULL;
- enc->string_deduper_hv = NULL;
-
- enc->freezeobj_svhash = NULL;
- enc->sereal_string_sv = NULL;
- enc->scratch_sv = NULL;
return enc;
}
my_hv_fetchs(he, val, opt, SRL_ENC_OPT_IDX_SORT_KEYS);
if ( !val )
my_hv_fetchs(he, val, opt, SRL_ENC_OPT_IDX_CANONICAL);
- if ( val && SvTRUE(val) )
+ if ( val && SvTRUE(val) ) {
SRL_ENC_SET_OPTION(enc, SRL_F_SORT_KEYS);
+ if (SvIV(val) > 1) {
+ SRL_ENC_SET_OPTION(enc, SRL_F_SORT_KEYS_PERL);
+ if (SvIV(val) > 2) {
+ SRL_ENC_SET_OPTION(enc, SRL_F_SORT_KEYS_PERL_REV);
+ }
+ }
+ }
my_hv_fetchs(he, val, opt, SRL_ENC_OPT_IDX_CANONICAL_REFS);
if ( !val )
}
/* Outputs a bless header and the class name (as some form of string or COPY).
- * Caller then has to output the actual reference payload. */
-SRL_STATIC_INLINE void
+ * Caller then has to output the actual reference payload.
+ * If it returns 1 it means the classname was written out and should NOT
+ * be overwritten by the ref rewrite logic (which handles REFP).
+ * If it returns 0 it means no classname was output. */
+SRL_STATIC_INLINE int
srl_dump_classname(pTHX_ srl_encoder_t *enc, SV *referent, SV *replacement)
{
/* Check that we actually want to support objects */
"using Sereal::Encoder was explicitly disabled using the "
"'croak_on_bless' option.");
} else if (expect_false( SRL_ENC_HAVE_OPTION(enc, SRL_F_NO_BLESS_OBJECTS) )) {
- return;
+ return 0;
} else {
const HV *stash = SvSTASH(referent);
PTABLE_t *string_seenhash = SRL_GET_STR_PTR_SEENHASH(enc);
- const ptrdiff_t oldoffset = (ptrdiff_t)PTABLE_fetch(string_seenhash, (SV *)stash);
+ svtype svt= SvTYPE(referent);
+ int is_av_or_hv= (svt == SVt_PVAV || svt== SVt_PVHV);
+ ptrdiff_t oldoffset= is_av_or_hv
+ ? 0
+ : (ptrdiff_t)PTABLE_fetch(string_seenhash, referent);
+
+ if (oldoffset) {
+ return 0;
+ } else {
+ svt= replacement ? SvTYPE(replacement) : SvTYPE(referent);
+ if (SRL_UNSUPPORTED_SvTYPE(svt)) {
+ return 0;
+ }
+ oldoffset= (ptrdiff_t)PTABLE_fetch(string_seenhash, (SV *)stash);
+ }
if (oldoffset != 0) {
/* Issue COPY instead of literal class name string */
srl_dump_pv(aTHX_ enc, class_name, len, 0);
#endif
}
+ if (is_av_or_hv) {
+ return 0;
+ } else {
+ /* use the string_seenhash to track which items we have seen before */
+ PTABLE_store(string_seenhash, (void *)referent, INT2PTR(void *, BODY_POS_OFS(&enc->buf)));
+ return 1;
+ }
}
+ return 0;
}
SRL_STATIC_INLINE void
srl_fixup_weakrefs(pTHX_ srl_encoder_t *enc)
{
- PTABLE_t *weak_seenhash = SRL_GET_WEAK_SEENHASH(enc);
- PTABLE_ITER_t *it = PTABLE_iter_new(weak_seenhash);
- PTABLE_ENTRY_t *ent;
-
- /* we now walk the weak_seenhash and set any tags it points
- * at to the PAD opcode, this basically turns the first weakref
- * we encountered into a normal ref when there is only a weakref
- * pointing at the structure. */
- while ( NULL != (ent = PTABLE_iter_next(it)) ) {
- const ptrdiff_t offset = (ptrdiff_t)ent->value;
- if ( offset ) {
- srl_buffer_char *pos = enc->buf.body_pos + offset;
- assert(*pos == SRL_HDR_WEAKEN);
- if (DEBUGHACK) warn("setting byte at offset %"UVuf" to PAD", (UV)offset);
- *pos = SRL_HDR_PAD;
+ PTABLE_t *weak_seenhash = SRL_GET_WEAK_SEENHASH_OR_NULL(enc);
+ if (!weak_seenhash)
+ return;
+
+ {
+ PTABLE_ITER_t *it = PTABLE_iter_new(weak_seenhash);
+ PTABLE_ENTRY_t *ent;
+
+ /* we now walk the weak_seenhash and set any tags it points
+ * at to the PAD opcode, this basically turns the first weakref
+ * we encountered into a normal ref when there is only a weakref
+ * pointing at the structure. */
+ while ( NULL != (ent = PTABLE_iter_next(it)) ) {
+ const ptrdiff_t offset = (ptrdiff_t)ent->value;
+ if ( offset ) {
+ srl_buffer_char *pos = enc->buf.body_pos + offset;
+ assert(*pos == SRL_HDR_WEAKEN);
+ if (DEBUGHACK) warn("setting byte at offset %"UVuf" to PAD", (UV)offset);
+ *pos = SRL_HDR_PAD;
+ }
}
- }
- PTABLE_iter_free(it);
+ PTABLE_iter_free(it);
+ }
}
}
}
-/* compare hash entries, used when all keys are bytestrings */
-static int
-he_cmp_fast(const void *a_, const void *b_)
+SRL_STATIC_INLINE void
+srl_dump_hv_unsorted_nomg(pTHX_ srl_encoder_t *enc, HV *src, UV n)
{
- /* even though we are called as a callback from qsort there is
- * no need for a dTHX here, we don't use anything that needs it */
- int cmp;
+ HE *he;
+ const int do_share_keys = HvSHAREKEYS((SV *)src);
+ HE **he_ptr= HvARRAY(src);
+ HE **he_end= he_ptr + HvMAX(src) + 1;
- HE *a = *(HE **)a_;
- HE *b = *(HE **)b_;
+ do {
+ for (he= *he_ptr++; he; he= HeNEXT(he) ) {
+ SV *v= HeVAL(he);
+ if (v != &PL_sv_placeholder) {
+ srl_dump_hk(aTHX_ enc, he, do_share_keys);
+ CALL_SRL_DUMP_SV(enc, v);
+ if (--n == 0) {
+ he_ptr= he_end;
+ break;
+ }
+ }
+ }
+ } while ( he_ptr < he_end );
+}
+
+SRL_STATIC_INLINE void
+srl_dump_hv_unsorted_mg(pTHX_ srl_encoder_t *enc, HV *src, const UV n)
+{
+ HE *he;
+ UV i= 0;
+ const int do_share_keys = HvSHAREKEYS((SV *)src);
+
+ (void)hv_iterinit(src); /* return value not reliable according to API docs */
+ while ((he = hv_iternext(src))) {
+ SV *v;
+ if (expect_false( i == n ))
+ croak("Panic: cannot serialize a tied hash which changes its size!");
+ v= hv_iterval(src, he);
+ srl_dump_hk(aTHX_ enc, he, do_share_keys);
+ CALL_SRL_DUMP_SV(enc, v);
+ ++i;
+ }
+ if (expect_false( i != n ))
+ croak("Panic: cannot serialize a tied hash which changes its size!");
+}
+
+/* sorting hashes - nothing in perl is easy. ever.
+ *
+ * Some things to keep in mind about perl hashes as you read this code:
+ *
+ * Hashes may be shared or not. Usually shared. This means they share their
+ * key data via PL_strtab.
+ *
+ * Hashes may be tied or not. Usually not. When tied the keys from the hash
+ * are available only as SV *'s, and when untied, the keys from the hash are
+ * accessed via HE *'s.
+ *
+ * Some HE's actually contains SV's but most contain a ptr/len combo with
+ * an utf8 flag. To make things even more interesting utf8 keys are
+ * normalized to latin1 by perl where possible before being stored in the HE,
+ * with the utf8 flag indicating "was utf8" instead of "is utf8" or "not utf8".
+ *
+ * The complexity about accessing the key for a hash can be managed away by
+ * perl via API's like hv_iterkeysv(), but using that means constructing mortal
+ * SV's for each key as we go.
+ *
+ * We could in theory use the HePV() interface, but one annoying result of the
+ * "was utf8" logic is that we can't use a sort comparator which looks
+ * at the raw binary of the keys when the keys might contain utf8. A utf8 key
+ * like "\xDF" will be downgraded to ascii in the HE form, but will be upgraded
+ * to the utf8 representation in the SV form. So if we want to do "fast" sorting
+ * we have to restrict it to non-utf8/non-sv keys, and force the use of the SV
+ * based API (which we have to use for tie's anyway) when we see a UTF8 key.
+ *
+ * Which is what we do below. In order to sort a hash we need to construct an
+ * array of its contents, in srl_dump_sorted_nomg() we walk the hash, checking
+ * each key, and copying each HE over into a scratch buffer which it then sorts.
+ * If during the transcription process it sees any utf8 or SV keys it exits
+ * immediately, and falls through to srl_dump_sort_mg(), which uses hv_iterkeysv()
+ * to construct an array of HE_SV instead, which we then sort.
+ */
- STRLEN la = HeKLEN (a);
- STRLEN lb = HeKLEN (b);
- if (!(cmp = memcmp (HeKEY (b), HeKEY (a), lb < la ? lb : la)))
- cmp = lb - la;
- return cmp;
+SRL_STATIC_INLINE int
+he_islt(const HE *a, const HE *b)
+{
+ /* no need for a dTHX here, we don't use anything that needs it */
+ const STRLEN la = HeKLEN(a);
+ const STRLEN lb = HeKLEN(b);
+ const int cmp = memcmp(HeKEY(a), HeKEY(b), la < lb ? la : lb);
+ if (cmp) {
+ return cmp < 0;
+ } else {
+ return la < lb;
+ }
}
-/* compare hash entries, used when some keys are sv's or utf8 */
-static int
-he_cmp_slow(const void *a, const void *b)
+SRL_STATIC_INLINE int
+he_sv_islt_fast(const HE_SV *a, const HE_SV *b)
{
- /* we are called as a callback from qsort, so no pTHX
- * is possible in our argument signature, so we need to do a
- * dTHX; here ourselves. */
- dTHX;
- return sv_cmp( HeSVKEY_force( *(HE **)b), HeSVKEY_force( *(HE **)a ) );
+ /* no need for a dTHX here, we don't use anything that needs it */
+ char *a_ptr;
+ char *b_ptr;
+ int a_isutf8;
+ int b_isutf8;
+ const STRLEN a_len= a->key.sv ? SvCUR(a->key.sv) : HeKLEN(a->val.he);
+ const STRLEN b_len= b->key.sv ? SvCUR(b->key.sv) : HeKLEN(b->val.he);
+ if (a_len != b_len) {
+ return a_len < b_len;
+ }
+ a_isutf8= (a->key.sv ? SvUTF8(a->key.sv) : HeKUTF8(a->val.he)) ? 0 : 1;
+ b_isutf8= (b->key.sv ? SvUTF8(b->key.sv) : HeKUTF8(b->val.he)) ? 0 : 1;
+ if (a_isutf8 != b_isutf8) {
+ return a_isutf8 < b_isutf8;
+ }
+ a_ptr= a->key.sv ? SvPVX(a->key.sv) : HeKEY(a->val.he);
+ b_ptr= b->key.sv ? SvPVX(b->key.sv) : HeKEY(b->val.he);
+ return memcmp(a_ptr, b_ptr, a_len < b_len ? a_len : b_len ) < 0;
}
+#define ISLT_HE_SV(a,b) he_sv_islt_fast( a, b )
+#define ISLT_SV_CMP(a,b) sv_cmp(a->key.sv, b->key.sv) == sort_dir
+
SRL_STATIC_INLINE void
-srl_dump_hv(pTHX_ srl_encoder_t *enc, HV *src, U32 refcount)
+srl_qsort(pTHX_ srl_encoder_t *enc, const UV n, HE_SV *array)
+{
+ if ( SRL_ENC_HAVE_OPTION(enc, SRL_F_SORT_KEYS_PERL) ) {
+ int sort_dir= SRL_ENC_HAVE_OPTION(enc, SRL_F_SORT_KEYS_PERL_REV) ? 1 : -1;
+ /* hack to forcefully disable "use bytes" */
+ COP cop= *PL_curcop;
+ cop.op_private= 0;
+
+ ENTER;
+ SAVETMPS;
+
+ SAVEVPTR (PL_curcop);
+ PL_curcop= &cop;
+
+ /* now sort */
+ QSORT(HE_SV, array, n, ISLT_SV_CMP);
+
+ FREETMPS;
+ LEAVE;
+ } else {
+ /* now sort */
+ QSORT(HE_SV, array, n, ISLT_HE_SV);
+ }
+}
+
+
+SRL_STATIC_INLINE void
+srl_dump_hv_sorted_sv_slow(pTHX_ srl_encoder_t *enc, HV *src, const UV n, HE_SV *array)
{
HE *he;
+ UV i= 0;
const int do_share_keys = HvSHAREKEYS((SV *)src);
- UV n;
+ const int is_tie= !array;
+
+ /* This sub is used for ties, and for hashes with SV keys in them,
+ * and when the user requests SORT_KEYS_PERL, it is the slowest way
+ * and most memory hungry way to serialize a hash. We will use the
+ * full perl api for extracting the contents of the hash, which fortifies
+ * us against ties, and we will convert all keys into mortal
+ * sv's where necessary. This means we can use sv_cmp on the keys
+ * if we wish.
+ */
- if ( SvMAGICAL(src) || SRL_ENC_HAVE_OPTION(enc, SRL_F_SORT_KEYS) ) {
- UV i;
- /* for tied hashes, we have to iterate to find the number of entries. Alas... */
- (void)hv_iterinit(src); /* return value not reliable according to API docs */
- n = 0;
- while ((he = hv_iternext(src))) { ++n; }
+ (void)hv_iterinit(src); /* return value not reliable according to API docs */
+ {
+ HE_SV *array_end;
+ if (!array) {
+ Newx(array, n, HE_SV);
+ SAVEFREEPV(array);
+ }
+ array_end= array + n;
+ while ((he = hv_iternext(src))) {
+ if (expect_false( i == n ))
+ croak("Panic: cannot serialize a %s hash which changes its size!",is_tie ? "tied" : "untied");
+ array[i].key.sv= hv_iterkeysv(he);
+ array[i].val.sv= hv_iterval(src,he);
+ i++;
+ }
+ if (expect_false( i != n ))
+ croak("Panic: can not serialize a %s hash which changes it size!", is_tie ? "tied" : "untied");
- BUF_SIZE_ASSERT_HV(&enc->buf, n);
+ srl_qsort(aTHX_ enc, n, array);
- if (n < 16 && refcount == 1 && !SRL_ENC_HAVE_OPTION(enc,SRL_F_CANONICAL_REFS)) {
- enc->buf.pos--; /* back up over the previous REFN */
- srl_buf_cat_char_nocheck(&enc->buf, SRL_HDR_HASHREF + n);
- } else {
- srl_buf_cat_varint_nocheck(aTHX_ &enc->buf, SRL_HDR_HASH, n);
+ while ( array < array_end ) {
+ CALL_SRL_DUMP_SV(enc, array->key.sv);
+ CALL_SRL_DUMP_SV(enc, array->val.sv);
+ array++;
}
+ }
+}
- (void)hv_iterinit(src); /* return value not reliable according to API docs */
- i = 0;
- if (SRL_ENC_HAVE_OPTION(enc, SRL_F_SORT_KEYS)) {
- HE **he_array;
- int fast = 1;
- Newxz(he_array, n, HE*);
- SAVEFREEPV(he_array);
- while ((he = hv_iternext(src))) {
- if (expect_false( i == n ))
- croak("Panic: Trying to dump a tied hash that has a different number of keys in each iteration is just daft. Do not do that.");
- he_array[i++]= he;
- if (HeKLEN (he) < 0 || HeKUTF8 (he))
- fast = 0;
- }
- if (expect_false( i != n ))
- croak("Panic: Trying to dump a tied hash that has a different number of keys in each iteration is just daft. Do not do that.");
- if (fast) {
- qsort(he_array, n, sizeof (HE *), he_cmp_fast);
- } else {
- /* hack to forcefully disable "use bytes" */
- COP cop= *PL_curcop;
- cop.op_private= 0;
-
- ENTER;
- SAVETMPS;
- SAVEVPTR (PL_curcop);
- PL_curcop= &cop;
+SRL_STATIC_INLINE void
+srl_dump_hv_sorted_nomg(pTHX_ srl_encoder_t *enc, HV *src, const UV n)
+{
+ HE *he;
+ const int do_share_keys = HvSHAREKEYS((SV *)src);
- qsort(he_array, n, sizeof (HE *), he_cmp_slow);
+ /* This sub is used only for untied hashes and when the user wants
+ * sorted keys, but not necessarily the order that perl would use.
+ */
- FREETMPS;
- LEAVE;
- }
- for ( i= 0; i < n ; i++ ) {
- SV *v;
- he= he_array[i];
- v= hv_iterval(src, he);
- srl_dump_hk(aTHX_ enc, he, do_share_keys);
- CALL_SRL_DUMP_SV(enc, v);
- }
- } else {
- while ((he = hv_iternext(src))) {
- SV *v;
- if (expect_false( i == n ))
- croak("Panic: Trying to dump a tied hash that has a different number of keys in each iteration is just daft. Do not do that.");
- v= hv_iterval(src, he);
- srl_dump_hk(aTHX_ enc, he, do_share_keys);
- CALL_SRL_DUMP_SV(enc, v);
- ++i;
+ (void)hv_iterinit(src); /* return value not reliable according to API docs */
+ {
+ HE_SV *array;
+ HE_SV *array_ptr;
+ HE_SV *array_end;
+ Newx(array, n, HE_SV);
+ SAVEFREEPV(array);
+ array_ptr = array;
+ while ((he = hv_iternext(src))) {
+ if ( HeKWASUTF8(he) ) {
+ array_ptr->key.sv= hv_iterkeysv(he);
+ } else {
+ array_ptr->key.sv = HeSVKEY(he);
}
- if (expect_false( i != n ))
- croak("Panic: Trying to dump a tied hash that has a different number of keys in each iteration is just daft. Do not do that.");
+ array_ptr->val.he = he;
+ array_ptr++;
}
- } else {
+
+ srl_qsort(aTHX_ enc, n, array);
+
+ array_end = array + n;
+ for ( array_end= array + n; array < array_end; array++ ) {
+ SV *v;
+ he = array->val.he;
+ v = hv_iterval(src, he);
+ srl_dump_hk(aTHX_ enc, he, do_share_keys);
+ CALL_SRL_DUMP_SV(enc, v);
+ }
+ }
+}
+
+SRL_STATIC_INLINE void
+srl_dump_hv(pTHX_ srl_encoder_t *enc, HV *src, U32 refcount)
+{
+ HE *he;
+ UV n;
+ if ( SvMAGICAL(src) ) {
+ /* for tied hashes, we have to iterate to find the number of entries. Alas... */
+ n= 0;
+ (void)hv_iterinit(src); /* return value not reliable according to API docs */
+ while ((he = hv_iternext(src))) { ++n; }
+ }
+ else {
n= HvUSEDKEYS(src);
- BUF_SIZE_ASSERT_HV(&enc->buf, n);
- if (n < 16 && refcount == 1 && !SRL_ENC_HAVE_OPTION(enc,SRL_F_CANONICAL_REFS)) {
- enc->buf.pos--; /* backup over the previous REFN */
- srl_buf_cat_char_nocheck(&enc->buf, SRL_HDR_HASHREF + n);
- } else {
- srl_buf_cat_varint_nocheck(aTHX_ &enc->buf, SRL_HDR_HASH, n);
+ }
+
+ BUF_SIZE_ASSERT_HV(&enc->buf, n);
+ if (n < 16 && refcount == 1 && !SRL_ENC_HAVE_OPTION(enc,SRL_F_CANONICAL_REFS)) {
+ enc->buf.pos--; /* backup over the previous REFN */
+ srl_buf_cat_char_nocheck(&enc->buf, SRL_HDR_HASHREF + n);
+ } else {
+ srl_buf_cat_varint_nocheck(aTHX_ &enc->buf, SRL_HDR_HASH, n);
+ }
+
+ if ( n ) {
+ if ( SvMAGICAL(src) || SRL_ENC_HAVE_OPTION(enc, SRL_F_SORT_KEYS_PERL) ) {
+ /* SORT_KEYS_PERL implies SORT_KEYS, but we check for either just to be
+ * careful - yves*/
+ if ( SRL_ENC_HAVE_OPTION(enc, SRL_F_SORT_KEYS|SRL_F_SORT_KEYS_PERL) ) {
+ srl_dump_hv_sorted_sv_slow(aTHX_ enc, src, n, NULL);
+ }
+ else {
+ srl_dump_hv_unsorted_mg(aTHX_ enc, src, n);
+ }
}
- if (n) {
- HE **he_ptr= HvARRAY(src);
- HE **he_end= he_ptr + HvMAX(src) + 1;
- do {
- for (he= *he_ptr++; he; he= HeNEXT(he) ) {
- SV *v= HeVAL(he);
- if (v != &PL_sv_placeholder) {
- srl_dump_hk(aTHX_ enc, he, do_share_keys);
- CALL_SRL_DUMP_SV(enc, v);
- if (--n == 0) {
- he_ptr= he_end;
- break;
- }
- }
- }
- } while ( he_ptr < he_end );
+ else {
+ if ( SRL_ENC_HAVE_OPTION(enc, SRL_F_SORT_KEYS) ) {
+ srl_dump_hv_sorted_nomg(aTHX_ enc, src, n);
+ }
+ else {
+ srl_dump_hv_unsorted_nomg(aTHX_ enc, src, n);
+ }
}
}
}
+
SRL_STATIC_INLINE void
srl_dump_hk(pTHX_ srl_encoder_t *enc, HE *src, const int share_keys)
{
#if defined(MODERN_REGEXP) && defined(REGEXP_NO_LONGER_POK)
/* Only need to enter here if we have rather modern regexps AND they're
* NO LONGER POK (5.17.6 and up). */
- if (expect_false( svt == SVt_REGEXP ) ) {
+ if ( expect_false( svt == SVt_REGEXP ) ) {
srl_dump_regexp(aTHX_ enc, src);
}
else
ref_rewrite_pos= BODY_POS_OFS(&enc->buf);
- if (expect_false( sv_isobject(src) )) {
+ if ( expect_false( sv_isobject(src) ) ) {
/* Write bless operator with class name */
replacement= srl_get_frozen_object(aTHX_ enc, src, referent);
- srl_dump_classname(aTHX_ enc, referent, replacement); /* 1 == have freeze call */
+ if (srl_dump_classname(aTHX_ enc, referent, replacement)) {
+ /* 1 means we should not rewrite away the classname */
+ ref_rewrite_pos= BODY_POS_OFS(&enc->buf);
+ }
}
srl_buf_cat_char(&enc->buf, SRL_HDR_REFN);
srl_dump_av(aTHX_ enc, (AV *)src, refcount);
}
else
- if (!SvOK(src)) { /* undef and weird shit */
- if ( svt > SVt_PVMG ) { /* we exclude magic, because magic sv's can be undef too */
+ if ( ! SvOK(src) ) { /* undef and weird shit */
+ if ( SRL_UNSUPPORTED_SvTYPE(svt) ) {
+ /* we exclude magic, because magic sv's can be undef too */
/* called when we find an unsupported type/reference. May either throw exception
* or write ONE (nested or single) item to the buffer. */
-#define SRL_HANDLE_UNSUPPORTED_TYPE(enc, src, svt, refsv, ref_rewrite_pos) \
+#define SRL_HANDLE_UNSUPPORTED_SvTYPE(enc, src, svt, refsv, ref_rewrite_pos) \
STMT_START { \
if ( SRL_ENC_HAVE_OPTION((enc), SRL_F_UNDEF_UNKNOWN) ) { \
if (SRL_ENC_HAVE_OPTION((enc), SRL_F_WARN_UNKNOWN)) \
"by the Sereal encoding format", (svt), sv_reftype((src),0),(src)); \
} \
} STMT_END
- SRL_HANDLE_UNSUPPORTED_TYPE(enc, src, svt, refsv, ref_rewrite_pos);
+ SRL_HANDLE_UNSUPPORTED_SvTYPE(enc, src, svt, refsv, ref_rewrite_pos);
}
else if (src == &PL_sv_undef && enc->protocol_version >= 3 ) {
srl_buf_cat_char(&enc->buf, SRL_HDR_CANONICAL_UNDEF);
}
}
else {
- SRL_HANDLE_UNSUPPORTED_TYPE(enc, src, svt, refsv, ref_rewrite_pos);
-#undef SRL_HANDLE_UNSUPPORTED_TYPE
+ SRL_HANDLE_UNSUPPORTED_SvTYPE(enc, src, svt, refsv, ref_rewrite_pos);
+#undef SRL_HANDLE_UNSUPPORTED_SvTYPE
}
--enc->recursion_depth;
}
UV recursion_depth; /* current Perl-ref recursion depth */
ptable_ptr ref_seenhash; /* ptr table for avoiding circular refs */
ptable_ptr weak_seenhash; /* ptr table for avoiding dangling weakrefs */
- ptable_ptr str_seenhash; /* ptr table for issuing COPY commands based on PTRS (used for classnames and keys) */
+ ptable_ptr str_seenhash; /* ptr table for issuing COPY commands based on PTRS (used for classnames and keys)
+ * for now this is also coopted to track which objects we have dumped as objects,
+ * and to ensure we only output a given object once.
+ * Possibly this should be replaced with freezeobj_svhash, but this works fine.
+ */
ptable_ptr freezeobj_svhash; /* ptr table for tracking objects and their frozen replacments via FREEZE */
HV *string_deduper_hv; /* track strings we have seen before, by content */
U32 hash;
} sv_with_hash;
+typedef struct {
+ union {
+ SV *sv;
+ } key;
+ union {
+ HE *he;
+ SV *sv;
+ } val;
+} HE_SV;
+
/* constructor from options */
srl_encoder_t *srl_build_encoder_struct(pTHX_ HV *opt, sv_with_hash *options);
* set since we otherwise croak. Corresponds to the 'warn_unknown' option. */
#define SRL_F_WARN_UNKNOWN 0x00020UL
-/* WARNING: SRL_F_COMPRESS_SNAPPY 0x00040UL
- * SRL_F_COMPRESS_SNAPPY_INCREMENTAL 0x00080UL
- * SRL_F_COMPRESS_ZLIB 0x00100UL
- * are moved to srl_compress.h */
+/* WARNING:
+ * #define SRL_F_COMPRESS_SNAPPY 0x00040UL
+ * #define SRL_F_COMPRESS_SNAPPY_INCREMENTAL 0x00080UL
+ * #define SRL_F_COMPRESS_ZLIB 0x00100UL
+ * are moved to srl_compress.h
+ */
/* Only meaningful if SRL_F_WARN_UNKNOWN also set. If this one is set, then we don't warn
* if the unsupported item has string overloading. */
/* if set in flags, then do not use ARRAYREF or HASHREF ever */
#define SRL_F_CANONICAL_REFS 0x08000UL
+#define SRL_F_SORT_KEYS_PERL 0x10000UL
+#define SRL_F_SORT_KEYS_PERL_REV 0x20000UL
/* ====================================================================
* oper flags
*/
/* Set while the encoder is in active use / dirty */
#define SRL_OF_ENCODER_DIRTY 1UL
-#define SRL_ENC_HAVE_OPTION(enc, flag_num) ((enc)->flags & flag_num)
+#define SRL_ENC_HAVE_OPTION(enc, flag_num) ((enc)->flags & (flag_num))
#define SRL_ENC_SET_OPTION(enc, flag_num) STMT_START {(enc)->flags |= (flag_num);}STMT_END
#define SRL_ENC_RESET_OPTION(enc, flag_num) STMT_START {(enc)->flags &= ~(flag_num);}STMT_END
#define SRL_ENC_SV_COPY_ALWAYS 0x00000000UL
#define SRL_ENC_SV_REUSE_MAYBE 0x00000001UL
+#define SRL_UNSUPPORTED_SvTYPE(svt) ( \
+ /* svt == SVt_INVLIST || */ \
+ svt == SVt_PVGV || \
+ svt == SVt_PVCV || \
+ svt == SVt_PVFM || \
+ svt == SVt_PVIO || \
+ 0 )
+
/* by default we do not allow people to build with support for SRL_HDR_LONG_DOUBLE */
#if defined(SRL_ALLOW_LONG_DOUBLE) && defined(USE_LONG_DOUBLE) && defined(HAS_LONG_DOUBLE)
#define SRL_DO_LONG_DOUBLE 1
#define SRL_RDR_ERROR_PANIC(buf, msg) SRL_RDR_ERRORf1((buf), "Panic: %s", msg);
/* trace functions */
+#ifdef WANT_SRL_RDR_TRACE
#ifdef TRACE_READER
# define SRL_RDR_TRACE(msg, args...) \
fprintf(stderr, "%s:%d:%s(): "msg"\n", __FILE__, __LINE__, __func__, ## args)
#else
# define SRL_RDR_TRACE(msg, args...)
#endif
+#endif
#endif
use strict;
use warnings;
-use Sereal::Decoder;
use Test::More;
use File::Spec;
use lib File::Spec->catdir(qw(t lib));
run_tests( "aliased_dedupe_strings", { aliased_dedupe_strings => 1 } );
done_testing();
} else {
- plan skip_all => 'Did not find right version of encoder';
+ plan skip_all => "Did not find right version of encoder/decoder";
}
sub run_tests {
TODO: {
# we must do this test before we test numeric equivalence
+ no warnings 'numeric';
my $have= ($decoded ^ '1');
my $want= ($tests{$test} ^ '1');
local $TODO = $have ne $want ? "Cannot reliably round trip NIOK flag(s)" : undef;
unless version->new(Test::Deep->VERSION) >= version->new('0.110');
1;
} or do {
- my $error = $@ // "Zombie Error";
+ my $error = $@ || "Zombie Error";
plan skip_all => "We are skipping all our tests because we don't have a suitable Test::Deep here, got error: $error";
};
plan tests => keys(%tests) * 2;
--- /dev/null
+use warnings;
+use strict;
+
+package NewStdHash;
+require Tie::Hash;
+our @ISA = qw(Tie::StdHash);
+
+package main;
+use Test::More;
+use File::Spec;
+use lib File::Spec->catdir(qw(t lib));
+BEGIN {
+ lib->import('lib')
+ if !-d 't';
+}
+use Sereal::TestSet qw(:all);
+my $u_df= "\xDFu";
+utf8::upgrade($u_df);
+my @keys= (
+ 'foo', 'bar', 'mip', 'xap', 'food', 'fool', 'fools', 'barking', 'bark',
+ $u_df,
+ "\x{df}a",
+ "\x{c3}",
+ "\x{de}",
+ "\x{e0}",
+ "\x{100}",
+ "\x{123}",
+ "\x{c4}\x{80}",
+);
+my $have_decoder= have_encoder_and_decoder();
+if ($have_decoder) {
+ plan tests => 1 + (4 * @keys);
+} else {
+ plan tests => 1;
+}
+
+my $enc = Sereal::Encoder->new({
+ sort_keys => 1,
+});
+
+tie my %new_std_hash, 'NewStdHash';
+my %normal_hash;
+foreach my $i (0..$#keys) {
+ $new_std_hash{$keys[$i]} = $i;
+ $normal_hash{$keys[$i]}= $i;
+}
+
+my $enc_tied = $enc->encode(\%new_std_hash);
+my $enc_normal= $enc->encode(\%normal_hash);
+
+
+is($enc_tied, $enc_normal, "Tied and untied are the same")
+or do {
+ diag "Normal:\n";
+ hobodecode $enc_normal;
+ diag "Tied: \n";
+ hobodecode $enc_tied;
+};
+
+if ($have_decoder) {
+ my $dec= Sereal::Decoder->new();
+ my $dec_tied= $dec->decode($enc_tied);
+ my $dec_normal= $dec->decode($enc_normal);
+ foreach my $i (0..$#keys) {
+ is($dec_tied->{$keys[$i]},$i, "decoded tied");
+ is($dec_normal->{$keys[$i]},$i, "decoded normal");
+ is($new_std_hash{$keys[$i]},$i, "original tied");
+ is($normal_hash{$keys[$i]},$i, "original normal");
+ }
+}
+
#!perl
use strict;
use warnings;
-use Sereal::Encoder qw(:all);
use Data::Dumper;
use File::Spec;
use Scalar::Util qw(blessed);
exit 0;
}
+Sereal::Encoder->import(":all");
Sereal::Decoder->import(":all");
# First, test tied hashes. Expected behaviour: We don't segfault, we don't
plan skip_all => 'Did not find right version of encoder (want 3.001006)';
}
else {
- run_roundtrip_tests("plain_canon", { canonical => 1 });
+ run_roundtrip_tests("plain_canonical", { canonical => 1 });
}
pass();
plan skip_all => 'Did not find right version of encoder (want 3.001006)';
}
else {
- run_roundtrip_tests("plain_canon", { canonical => 1 });
+ run_roundtrip_tests("plain_canonical", { canonical => 1 });
}
pass();
plan skip_all => 'Did not find right version of encoder (want 3.001006)';
}
else {
- run_roundtrip_tests("plain_canon", { canonical => 1 });
+ run_roundtrip_tests("plain_canonical", { canonical => 1 });
}
pass();
--- /dev/null
+#!perl
+use strict;
+use warnings;
+use Data::Dumper;
+use File::Spec;
+
+use lib File::Spec->catdir(qw(t lib));
+BEGIN {
+ lib->import('lib')
+ if !-d 't';
+}
+
+use Sereal::TestSet qw(:all);
+use Test::More;
+
+my $ok = have_encoder_and_decoder();
+if (not $ok) {
+ plan skip_all => 'Did not find right version of encoder';
+}
+else {
+ run_roundtrip_tests(
+ 'sort_keys', { sort_keys => 2 }
+ );
+}
+
+
+pass();
+done_testing();
+
--- /dev/null
+#!perl
+use strict;
+use warnings;
+use Data::Dumper;
+use File::Spec;
+
+use lib File::Spec->catdir(qw(t lib));
+BEGIN {
+ lib->import('lib')
+ if !-d 't';
+}
+
+use Sereal::TestSet qw(:all);
+use Test::More;
+
+my $ok = have_encoder_and_decoder();
+if (not $ok) {
+ plan skip_all => 'Did not find right version of encoder';
+}
+else {
+ run_roundtrip_tests(
+ 'sort_keys', { sort_keys => 3 }
+ );
+}
+
+
+pass();
+done_testing();
+
exit(0);
}
}
+use File::Spec;
+use lib File::Spec->catdir(qw(t lib));
+BEGIN {
+ lib->import('lib')
+ if !-d 't';
+}
+use Sereal::TestSet qw(:all);
use Sereal::Encoder;
plan tests => 1;
use threads;
+use threads::shared;
sub foo {}
SCOPE: {
- my $enc = Sereal::Encoder->new;
+ my $dat= shared_clone([undef]);
+ my $enc = Sereal::Encoder->new;
- my $thr = threads->new(\&foo);
- $thr->join;
+ my $thr = threads->new(\&foo);
+ $thr->join;
+ my $encoded= $enc->encode($dat);
}
pass();
use Config;
use Carp qw(confess);
use Storable qw(dclone);
+use Cwd;
# Dynamically load constants from whatever is being tested
-our ($Class, $ConstClass);
+our ($Class, $ConstClass, $InRepo);
+sub get_git_top_dir {
+ my @dirs = (0, 1, 2, 4);
+ for my $d (@dirs) {
+ my $tdir = File::Spec->catdir(map File::Spec->updir, 1..$d);
+ my $gdir = File::Spec->catdir($tdir, '.git');
+ return $tdir
+ if -d $gdir;
+ }
+ return();
+}
+
+BEGIN{
+ if (defined(my $top_dir = get_git_top_dir())) {
+ for my $need ('Encoder', 'Decoder') {
+ my $blib_dir = File::Spec->catdir($top_dir, 'Perl', $need, "blib");
+ if (-d $blib_dir) {
+ require blib;
+ blib->import($blib_dir);
+ }
+ }
+ $InRepo=1;
+ }
+}
BEGIN {
- if (-e "lib/Sereal/Encoder.pm") {
+ if (-e "lib/Sereal.pm") {
+ $Class = 'Sereal::Encoder';
+ }
+ elsif (-e "lib/Sereal/Encoder.pm") {
$Class = 'Sereal::Encoder';
}
elsif (-e "lib/Sereal/Decoder.pm") {
elsif (-e "lib/Sereal/Splitter.pm") {
$Class = 'Sereal::Splitter';
} else {
- die "Could not find an applicable Sereal constants location";
+ die "Could not find an applicable Sereal constants location (in: ",cwd(),")";
}
$ConstClass = $Class . "::Constants";
eval "use $ConstClass ':all'; 1"
}
-sub get_git_top_dir {
- my @dirs = (0, 1, 2);
- for my $d (@dirs) {
- my $tdir = File::Spec->catdir(map File::Spec->updir, 1..$d);
- my $gdir = File::Spec->catdir($tdir, '.git');
- return $tdir
- if -d $gdir;
- }
- return();
-}
sub have_encoder_and_decoder {
my ($min_v)= @_;
my $need = $Class =~ /Encoder/ ? "Decoder" : "Encoder";
my $need_class = "Sereal::$need";
- if (defined(my $top_dir = get_git_top_dir())) {
- my $blib_dir = File::Spec->catdir($top_dir, 'Perl', $need, "blib");
- if (-d $blib_dir) {
- require blib;
- blib->import($blib_dir);
- }
- }
eval "use $Class; 1"
or do {
note("Could not locate $Class for testing" . ($@ ? " (Exception: $@)" : ""));
};
my $cmp_v = $need_class->VERSION;
if ($min_v and $cmp_v < $min_v) {
- note("Could not load correct version of $need_class for testing "
+ diag("Could not load correct version of $need_class for testing "
."(got: $cmp_v, needed at least $min_v)");
return;
}
$cmp_v = sprintf("%.2f", int($cmp_v*100)/100);
my %compat_versions = map {$_ => 1} $Class->_test_compat();
if (not defined $cmp_v or not exists $compat_versions{$cmp_v}) {
- note("Could not load correct version of $need_class for testing "
+ diag("Could not load correct version of $need_class for testing "
."(got: $cmp_v, needed any of ".join(", ", keys %compat_versions).")");
return();
}
my $sum= $eng0e0 + $eng0e1 + $eng2;
+sub encoder_required {
+ my ($ver, $name)= @_;
+ return "" . ( $Sereal::Encoder::VERSION < $ver ? "TODO " : "") . $name;
+}
sub _get_roundtrip_tests {
my @ScalarRoundtripTests = (
1023,1024,1025,
8191,8192,8193,
)),
- ( map { [ ( $Sereal::Encoder::VERSION < 3.005002 ? "TODO" : "")
- . " troublesome num/strs '$_'", $_ ] } @numstr ),
+ ( map { [ encoder_required(3.005002, " troublesome num/strs '$_'"),
+ $_ ] } @numstr ),
["long latin1 string", "üll" x 10000],
["long utf8 string", do {use utf8; " עדיין חשב" x 10000}],
["long utf8 string with only ascii", do {use utf8; "foo" x 10000}],
[ "undef", [\undef, \undef] ],
);
+ my @blessed_array_check1;
+ $blessed_array_check1[0]= "foo";
+ $blessed_array_check1[1]= bless \$blessed_array_check1[0], "BlessedArrayCheck";
+ $blessed_array_check1[2]= \$blessed_array_check1[0];
+
+ my @blessed_array_check2= (3,0,0,3);
+ $blessed_array_check2[1]= \$blessed_array_check2[0];
+ $blessed_array_check2[2]= \$blessed_array_check2[3];
+ bless \$blessed_array_check2[0], "BlessedArrayCheck";
+ bless \$blessed_array_check2[3], "BlessedArrayCheck";
+
+ my @sc_array=(1,1);
+ $sc_array[0]=bless \$sc_array[1], "BlessedArrayLeft";
+ $sc_array[1]=bless \$sc_array[0], "BlessedArrayRight";
+
+
my @RoundtripTests = (
@ScalarRoundtripTests,
+ [ encoder_required(3.006006,"BlessedArrayCheck 1"), \@blessed_array_check1 ],
+ [ encoder_required(3.006006,"BlessedArrayCheck 2"), \@blessed_array_check2 ],
+ [ encoder_required(3.006006,"Scalar Cross Blessed Array"), \@sc_array ],
["[{foo => 1}, {foo => 2}] - repeated hash keys",
[{foo => 1}, {foo => 2}] ],
eval {$decoded2 = $dec->($encoded2); 1}
or do {
my $err = $@ || 'Zombie error';
- diag("Got error while encoding the second time: $err");
+ diag("Got error while decoding the second time: $err");
+ # hobodecode($encoded2);
};
defined($decoded2) == defined($data)
eval {$decoded3 = $dec->($encoded3); 1}
or do {
my $err = $@ || 'Zombie error';
- diag("Got error while encoding the third time: $err");
+ diag("Got error while decoding the third time: $err");
};
defined($decoded3) == defined($data)