Revision history for Perl extension Sereal-Encoder
+* Warning: For a seamless upgrade, upgrade to version 2
+* of the decoder before upgrading to version 2 of the
+* encoder!
+
+2.03 Tue Jan 7 20:00 2014 (AMS time)
+ - (Hopefully) final fixes to FREEZE/THAW functionality:
+ => Add safe assertion to make sure that we don't segfault on invalid
+ data.
+ => Fix encoding/decoding of data structures with repeated references
+ to the same instance of a class that has FREEZE/THAW hooks.
+ Thanks to Christian Hansen for a test case.
+ - Distribution dependency fix.
+
+2.02 Mon Jan 6 15:00 2014 (AMS time)
+ - Fundamental fixes for FREEZE/THAW support in previous Sereal v2
+ releases. If you plan to use FREEZE/THAW, make sure you have 2.02
+ or better (dito for the decoder).
+
+2.01 Tue Dec 31 08:15 2013 (AMS time)
+ - Promoting changes from 0.37 to 2.00_03 to a stable release.
+ (This being the first protocol v2 stable release.)
+ - Minor performance tweaks.
+
+2.00_03 Sun Dec 29 10:33 2013 (AMS time)
+ - FREEZE/THAW hooks for object serialization.
+ - Test improvements (allowing for partial parallel run)
+ - Minor optimizations.
+
+2.00_02 Mon Oct 28 19:32 2013 (AMS time)
+ - Sereal::Encoder now requires Sereal::Decoder for better testing.
+ - Fix Test::Warn dependency problem of 2.00_01.
+
+2.00_01 tue Oct 1 07:34 2013 (AMS time)
+ - NEW PROTOCOL VERSION: V2
+ - User-data in header functionality: You may embed arbitrary
+ Sereal-serializable data in a document header. The document
+ header isn't compressed, so this is ideal for retrieving
+ small chunks of meta-data (eg. routing information) without
+ having to deserialize the entire document.
+ - Relocatable Sereal document bodies
+ - Encoder never emits non-incremental Snappy encoding for V2
+ - Offsets now 1-based in relocatable format, not 0
+ - Fixed VERY obscure (and rare) memory leak.
+ - Improved error messages
+ - Remove warning about Sereal not being production-grade
+ (because it IS).
+ - Detect when the Snappy compression was net negative in size
+ and back out
+ - C89/Windows fixes (bulk88)
+ - 5.18 compat: Skip test failing due to hash-randomization (Zefram)
+
0.37 Mon Sep 2 07:40 2013 (AMS time)
- Windows and C89 fixes
- Band-aid: Skip test failing due to hash-randomization [Zefram]
srl_destroy_encoder(aTHX_ enc);
void
-encode(enc, src)
+encode(enc, src, ...)
srl_encoder_t *enc;
SV *src;
+ SV *hdr_user_data_src = NULL;
PPCODE:
assert(enc != NULL);
- srl_dump_data_structure(aTHX_ enc, src);
- assert(enc->pos > enc->buf_start);
+ if (items >= 2 && SvOK(ST(2)))
+ hdr_user_data_src = ST(2);
+ srl_dump_data_structure(aTHX_ enc, src, hdr_user_data_src);
+ assert(enc->buf.pos > enc->buf.start);
/* We always copy the string since we might reuse the string buffer. That means
* we already have to do a malloc and we might as well use the opportunity to
* allocate only as much memory as we really need to hold the output. */
- ST(0) = sv_2mortal(newSVpvn(enc->buf_start, (STRLEN)BUF_POS_OFS(enc)));
+ ST(0) = sv_2mortal(newSVpvn(enc->buf.start, (STRLEN)BUF_POS_OFS(enc->buf)));
XSRETURN(1);
void
PPCODE:
enc = srl_build_encoder_struct(aTHX_ opt);
assert(enc != NULL);
- srl_dump_data_structure(aTHX_ enc, src);
+ srl_dump_data_structure(aTHX_ enc, src, NULL);
/* Avoid copy by stealing string buffer if it is not too large.
* This makes sense in the functional interface since the string
* buffer isn't ever going to be reused. */
- assert(enc->buf_start < enc->pos);
- if (BUF_POS_OFS(enc) > 20 && BUF_SPACE(enc) < BUF_POS_OFS(enc) ) {
+ assert(enc->buf.start < enc->buf.pos);
+ if (BUF_POS_OFS(enc->buf) > 20 && BUF_SPACE(enc->buf) < BUF_POS_OFS(enc->buf) ) {
/* If not wasting more than 2x memory - FIXME fungible */
SV *sv = sv_2mortal(newSV_type(SVt_PV));
ST(0) = sv;
- SvPV_set(sv, enc->buf_start);
- SvLEN_set(sv, BUF_SIZE(enc));
- SvCUR_set(sv, BUF_POS_OFS(enc));
+ SvPV_set(sv, enc->buf.start);
+ SvLEN_set(sv, BUF_SIZE(enc->buf));
+ SvCUR_set(sv, BUF_POS_OFS(enc->buf));
SvPOK_on(sv);
- enc->buf_start = enc->pos = NULL; /* no need to free these guys now */
+ enc->buf.start = enc->buf.pos = NULL; /* no need to free these guys now */
}
else {
- ST(0) = sv_2mortal(newSVpvn(enc->buf_start, (STRLEN)BUF_POS_OFS(enc)));
+ ST(0) = sv_2mortal(newSVpvn(enc->buf.start, (STRLEN)BUF_POS_OFS(enc->buf)));
}
XSRETURN(1);
+void
+encode_sereal_with_header_data(src, hdr_user_data_src, opt = NULL)
+ SV *src;
+ SV *hdr_user_data_src;
+ HV *opt;
+ PREINIT:
+ srl_encoder_t *enc;
+ PPCODE:
+ if (!SvOK(hdr_user_data_src))
+ hdr_user_data_src = NULL;
+ enc = srl_build_encoder_struct(aTHX_ opt);
+ assert(enc != NULL);
+ srl_dump_data_structure(aTHX_ enc, src, hdr_user_data_src);
+ /* Avoid copy by stealing string buffer if it is not too large.
+ * This makes sense in the functional interface since the string
+ * buffer isn't ever going to be reused. */
+ assert(enc->buf.start < enc->buf.pos);
+ if (BUF_POS_OFS(enc->buf) > 20 && BUF_SPACE(enc->buf) < BUF_POS_OFS(enc->buf) ) {
+ /* If not wasting more than 2x memory - FIXME fungible */
+ SV *sv = sv_2mortal(newSV_type(SVt_PV));
+ ST(0) = sv;
+ SvPV_set(sv, enc->buf.start);
+ SvLEN_set(sv, BUF_SIZE(enc->buf));
+ SvCUR_set(sv, BUF_POS_OFS(enc->buf));
+ SvPOK_on(sv);
+
+ enc->buf.start = enc->buf.pos = NULL; /* no need to free these guys now */
+ }
+ else {
+ ST(0) = sv_2mortal(newSVpvn(enc->buf.start, (STRLEN)BUF_POS_OFS(enc->buf)));
+ }
+ XSRETURN(1);
MODULE = Sereal::Encoder PACKAGE = Sereal::Encoder::Constants
PROTOTYPES: DISABLE
author_tools/bench.pl
+author_tools/freeze_thaw_timing.pl
author_tools/hobodecoder.pl
author_tools/numeric_str_length.c
author_tools/stringify_test.c
snappy/csnappy_internal.h
snappy/csnappy_internal_userspace.h
srl_buffer.h
+srl_buffer_types.h
srl_common.h
srl_encoder.c
srl_encoder.h
t/020_sort_keys.t
t/021_sort_keys_option.t
t/100_roundtrip.t
+t/101_roundtrip_v1.t
t/110_nobless.t
+t/120_hdr_data.t
+t/130_freezethaw.t
t/160_recursion.t
t/200_bulk.t
t/300_fail.t
"File::Path" : "0",
"File::Spec" : "0",
"Scalar::Util" : "0",
+ "Sereal::Decoder" : "2.03",
"Test::LongString" : "0",
"Test::More" : "0.88",
"Test::Warn" : "0"
"url" : "git://github.com/Sereal/Sereal.git"
}
},
- "version" : "0.37"
+ "version" : "2.03"
}
File::Path: 0
File::Spec: 0
Scalar::Util: 0
+ Sereal::Decoder: 2.03
Test::LongString: 0
Test::More: 0.88
Test::Warn: 0
resources:
bugtracker: https://github.com/Sereal/Sereal/issues
repository: git://github.com/Sereal/Sereal.git
-version: 0.37
+version: 2.03
'Test::LongString' => '0',
'Data::Dumper' => '0',
'Test::Warn' => '0',
+ 'Sereal::Decoder' => '2.03',
},
NAME => $module,
VERSION_FROM => 'lib/Sereal/Encoder.pm', # finds $VERSION
use Data::Dumper qw(Dumper);
use Data::Dumper::Limited qw(DumpLimited);
use Data::MessagePack;
+use CBOR::XS qw(encode_cbor decode_cbor);
use Getopt::Long qw(GetOptions);
my (
my @rand = map rand, 1..1000;
our %data;
-$data{$_}= make_data() for qw(sereal sereal_func dd1 dd2 ddl mp json_xs storable sereal_snappy);
+$data{$_}= make_data() for qw(sereal sereal_func dd1 dd2 ddl mp json_xs storable sereal_snappy cbor);
our $enc = Sereal::Encoder->new(\%opt);
our $enc_snappy = Sereal::Encoder->new({%opt, snappy => 1});
our $dec = Sereal::Decoder->new(\%opt);
-our ($json_xs, $dd1, $dd2, $ddl, $sereal, $storable, $mp, $sereal_snappy);
+our ($json_xs, $dd1, $dd2, $ddl, $sereal, $storable, $mp, $sereal_snappy, $cbor);
# do this first before any of the other dumpers "contaminate" the iv/pv issue
$sereal = $enc->encode($data{sereal});
$sereal_snappy = $enc_snappy->encode($data{sereal_snappy});
$dd2 = Dumper($data{dd2});
$ddl = DumpLimited($data{ddl}) if !$medium_data or $nobless;
$mp = $mpo->pack($data{mp}) if !$medium_data or $nobless;
- $storable = nfreeze($data{storable}); # must be last
+ $cbor = encode_cbor($data{cbor}) if !$medium_data or $nobless;
+ $storable = nfreeze($data{storable});
}
print($sereal), exit if $dump;
["JSON::XS", bytes::length($json_xs)],
["Data::Dumper::Limited", bytes::length($ddl)],
["Data::MessagePack", bytes::length($mp)],
+ ["CBOR", bytes::length($cbor)],
)),
["Data::Dumper (1)", bytes::length($dd1)],
["Data::Dumper (2)", bytes::length($dd2)],
json_xs => '$::x = encode_json($::data{json_xs});',
ddl => '$::x = DumpLimited($::data{ddl});',
msgpack => '$::x = $::mpo->pack($::data{mp});',
+ cbor => '$::x = encode_cbor($::data{cbor});',
)),
dd_noindent => '$::x = Data::Dumper->new([$::data{dd1}])->Indent(0)->Dump();',
dd => '$::x = Dumper($::data{dd2});',
json_xs => '$::x = decode_json($::json_xs);',
undump_ddl => '$::x = Data::Undump::undump($::ddl);',
msgpack => '$::x = $::mpo->unpack($::mp);',
+ cbor => '$::x = decode_cbor($::cbor);',
)),
eval_dd => '$::x = eval $::dd1;',
storable => '$::x = thaw($::storable);',
--- /dev/null
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use Sereal::Encoder;
+use Sereal::Decoder;
+
+use Benchmark::Dumb qw(cmpthese);
+
+my $enc_nocb = Sereal::Encoder->new();
+my $enc_cb = Sereal::Encoder->new({freeze_callbacks => 1});
+my $dec = Sereal::Decoder->new();
+
+package Foo;
+sub new {
+ my $class = shift;
+ return bless({@_} => $class);
+}
+
+sub FREEZE {
+ my ($self, $serializer) = @_;
+ return $self->{name}; # performance
+}
+
+sub THAW {
+ my ($class, $serializer, $data) = @_;
+ return Foo->new(name => $data);
+}
+
+package main;
+
+my $data = Foo->new(name => "blargh");
+my $data_big = [];
+for (1..100) {
+ push @$data_big, Foo->new(name => "blargh");
+}
+my $data_big_nocb = [];
+for (1..100) {
+ push @$data_big_nocb, bless({name => "blargh"} => "Bar");
+}
+
+my $frozen_nocb = $enc_nocb->encode($data);
+my $frozen_cb = $enc_cb->encode($data);
+
+my $frozen_big_nocb = $enc_nocb->encode($data_big);
+my $frozen_big_cb = $enc_cb->encode($data_big);
+
+my $timing = "1000.01";
+
+print "Comparing small serialization with/out callbacks...\n";
+cmpthese(
+ $timing,
+ {
+ cb => sub {$enc_cb->encode($data)},
+ no_cb => sub {$enc_nocb->encode($data)},
+ }
+);
+
+print "Comparing big serialization with/out callbacks...\n";
+cmpthese(
+ $timing,
+ {
+ cb => sub {$enc_cb->encode($data_big)},
+ no_cb => sub {$enc_nocb->encode($data_big)},
+ cb_nocbdata => sub {$enc_cb->encode($data_big_nocb)},
+ no_cb_nocbdata => sub {$enc_nocb->encode($data_big_nocb)},
+ }
+);
+
+
+print "Comparing small deserialization with/out callbacks...\n";
+cmpthese(
+ $timing,
+ {
+ cb => sub {$dec->decode($frozen_cb)},
+ no_cb => sub {$dec->decode($frozen_nocb)},
+ }
+);
+
+print "Comparing big deserialization with/out callbacks...\n";
+cmpthese(
+ $timing,
+ {
+ cb => sub {$dec->decode($frozen_big_cb)},
+ no_cb => sub {$dec->decode($frozen_big_nocb)},
+ }
+);
' } or die "No encoder/decoder constants: $err\n$@";
}
-GetOptions(
- my $opt = {},
- 'e|stderr',
-);
-
-$| = 1;
-if ($opt->{e}) {
- select(STDERR);
-}
-
-my %const_names = map {$_ => eval "$_"} @Sereal::Constants::EXPORT_OK;
-#print Dumper \%const_names; exit;
-
-local $/ = undef;
-my $data = <STDIN>;
-
-open my $fh, "| od -tu1c" or die $!;
-print $fh $data;
-close $fh;
-
-print "\n\nTotal length: " . length($data) . "\n\n";
-
-my $indent = "";
my $done;
-parse_header();
-while (length $data) {
- my $done = parse_sv("");
-}
+my $data;
+my $hlen;
+my $indent = "";
+my %const_names = map {$_ => eval "$_"} @Sereal::Constants::EXPORT_OK;
sub parse_header {
$data =~ s/^(=srl)(.)// or die "invalid header: $data";
print "Sereal protocol version: $proto_version\n";
if (length($hdr)) {
print "Header($len): " . join(" ", map ord, split //, $hdr) . "\n";
+ if ($proto_version >= 2 && (ord(substr($hdr, 0, 1)) & 1) ) { # if first bit set => user header data
+ print "Found user data in header:\n";
+ my $tmp_data = $data; # dance necessary because $data is treated as a global :( hobo, hobo, hobo!
+ $data = substr($hdr, 1);
+ parse_sv(" ");
+ $data = $tmp_data;
+ print "End of user data in header. Body:\n";
+ }
}
else {
print "Empty Header.\n";
my $out = Compress::Snappy::decompress($data);
$data = $out;
} elsif ($encoding == SRL_PROTOCOL_ENCODING_SNAPPY_INCREMENTAL) {
- die "Incremental Snappy encoding not implemented in hobodecoder. (yet. It's easy to do.)";
- }
- elsif ($encoding) {
+ print "Header says: Document body is Snappy-compressed (incremental).\n";
+ my $compressed_len = varint();
+ require Compress::Snappy;
+ my $out = Compress::Snappy::decompress($data);
+ $data = $out;
+ } elsif ($encoding) {
die "Invalid encoding '" . ($encoding >> SRL_PROTOCOL_VERSION_BITS) . "'";
}
+ $hlen= length($done);
}
my ($len_f, $len_d, $len_D);
return unpack("D",$v);
}
+my $fmt1= "%06u/%06u: %02x%1s %03s %s";
+my $fmt2= "%-6s %-6s %-2s%1s %-3s %s";
+my $lead_items= 5; # 1 less than the fmt2
sub parse_sv {
my ($ind) = @_;
my $bv= $o;
my $high = $o > 128;
$o -= 128 if $high;
+ printf $fmt1, $p, $p-$hlen+1, $o, $high ? '*' : ' ', $bv, $ind;
+
if ($o == SRL_HDR_VARINT) {
- printf "%06u: %02x %03s %sVARINT: %u\n", $p, $o, $bv, $ind, varint();
+ printf "VARINT: %u\n", varint();
}
elsif (SRL_HDR_POS_LOW <= $o && $o <= SRL_HDR_POS_HIGH) {
- printf "%06u: %02x %03s %sPOS: %u\n", $p, $o, $bv, $ind, $o;
+ printf "POS: %u\n", $o;
}
elsif (SRL_HDR_NEG_LOW <= $o && $o <= SRL_HDR_NEG_HIGH) {
$o = $o - 32;
- printf "%06u: %02x %03s %sNEG: %i\n", $p, $o, $bv, $ind, $o;
+ printf "NEG: %i\n", $o;
}
elsif ($o >= SRL_HDR_SHORT_BINARY_LOW) {
$o -= SRL_HDR_SHORT_BINARY_LOW;
my $len = $o;
my $str = substr($data, 0, $len, '');
$done .= $str;
- printf "%06u: %02x %03s %sSHORT_BINARY(%u): '%s'\n", $p, $o, $bv, $ind, $len, $str;
+ printf "SHORT_BINARY(%u): '%s'\n", $len, $str;
}
elsif ($o == SRL_HDR_BINARY || $o == SRL_HDR_STR_UTF8) {
my $l = varint();
my $str = substr($data, 0, $l, ""); # fixme UTF8
$done .= $str;
- printf "%06u: %02x %03s %s".($o == SRL_HDR_STR_UTF8 ? "STR_UTF8" : "BINARY")."(%u): '%s'\n", $p, $o, $bv, $ind, $l, $str;
+ printf( ($o == SRL_HDR_STR_UTF8 ? "STR_UTF8" : "BINARY")."(%u): '%s'\n", $l, $str);
}
elsif ($o == SRL_HDR_FLOAT) {
- printf "%06u: %02x %03s %sFLOAT(%f)\n", $p, $o, $bv, $ind, parse_float();
+ printf "FLOAT(%f)\n", parse_float();
}
elsif ($o == SRL_HDR_DOUBLE) {
- printf "%06u: %02x %03s %sDOUBLE(%f)\n", $p, $o, $bv, $ind, parse_double();
+ printf "DOUBLE(%f)\n", parse_double();
}
elsif ($o == SRL_HDR_LONG_DOUBLE) {
- printf "%06u: %02x %03s %sLONG_DOUBLE(%f)\n", $p, $o, $bv, $ind, parse_long_double();
+ printf "LONG_DOUBLE(%f)\n", parse_long_double();
}
elsif ($o == SRL_HDR_REFN) {
- printf "%06u: %02x %03s %sREFN\n", $p, $o, $bv, $ind;
+ printf "REFN\n";
parse_sv($ind . " ");
}
elsif ($o == SRL_HDR_REFP) {
my $len = varint();
- printf "%06u: %02x %03s %sREFP(%u)\n", $p, $o, $bv, $ind, $len;
+ printf "REFP(%u)\n", $len;
}
elsif ($o == SRL_HDR_COPY) {
my $len = varint();
- printf "%06u: %02x %03s %sCOPY(%u)\n", $p, $o, $bv, $ind, $len;
+ printf "COPY(%u)\n", $len;
}
elsif (SRL_HDR_ARRAYREF_LOW <= $o && $o <= SRL_HDR_ARRAYREF_HIGH) {
- printf "%06u: %02x %03s %sARRAYREF", $p, $o, $bv, $ind;
+ printf "ARRAYREF";
parse_av($ind,$o);
}
elsif ($o == SRL_HDR_ARRAY) {
- printf "%06u: %02x %03s %sARRAY", $p, $o, $bv, $ind;
+ printf "ARRAY";
parse_av($ind);
}
elsif (SRL_HDR_HASHREF_LOW <= $o && $o <= SRL_HDR_HASHREF_HIGH) {
- printf "%06u: %02x %03s %sHASHREF", $p, $o, $bv, $ind;
+ printf "HASHREF";
parse_hv($ind,$o);
}
elsif ($o == SRL_HDR_HASH) {
- printf "%06u: %02x %03s %sHASH", $p, $o, $bv, $ind;
+ printf "HASH";
parse_hv($ind);
}
elsif ($o == SRL_HDR_UNDEF) {
- printf "%06u: %02x %03s %sUNDEF\n", $p, $o, $bv, $ind;
+ printf "UNDEF\n";
}
elsif ($o == SRL_HDR_WEAKEN) {
- printf "%06u: %02x %03s %sWEAKEN\n", $p, $o, $bv, $ind;
+ printf "WEAKEN\n";
parse_sv($ind);
}
elsif ($o == SRL_HDR_PAD) {
- printf "%06u: %02x %03s %sPAD\n", $p, $o, $bv, $ind;
+ printf "PAD\n";
}
elsif ($o == SRL_HDR_ALIAS) {
my $ofs= varint();
- printf "%06u: %02x %03s %sALIAS(%u)\n", $p, $o, $bv, $ind, $ofs;
+ printf "ALIAS(%u)\n", $ofs;
}
elsif ($o == SRL_HDR_OBJECTV) {
my $ofs= varint();
- printf "%06u: %02x %03s %sOBJECTV(%d)\n", $p, $o, $bv, $ind, $ofs;
- printf "%6s %2s %3s %s Value:\n",("") x 3, $ind." ";
+ printf "OBJECTV(%d)\n", $ofs;
+ printf "$fmt2 Value:\n",("") x $lead_items, $ind;
+ parse_sv($ind." ");
+ }
+ elsif ($o == SRL_HDR_OBJECTV_FREEZE) {
+ my $ofs= varint();
+ printf "OBJECTV_FREEZE(%d)\n", $ofs;
+ printf "$fmt2 Value:\n",("") x $lead_items, $ind;
parse_sv($ind." ");
}
elsif ($o == SRL_HDR_OBJECT) {
- printf "%06u: %02x %03s %sOBJECT\n", $p, $o, $bv, $ind;
- printf "%6s %2s %3s %s Class:\n",("") x 3, $ind." ";
+ printf "OBJECT\n";
+ printf "$fmt2 Class:\n",("") x $lead_items, $ind;
parse_sv($ind." ");
- printf "%6s %2s %3s %s Value:\n",("") x 3, $ind." ";
+ printf "$fmt2 Value:\n",("") x $lead_items, $ind;
+ parse_sv($ind." ");
+ }
+ elsif ($o == SRL_HDR_OBJECT_FREEZE) {
+ printf "OBJECT_FREEZE\n";
+ printf "$fmt2 Class:\n",("") x $lead_items, $ind;
+ parse_sv($ind." ");
+ printf "$fmt2 Value:\n",("") x $lead_items, $ind;
parse_sv($ind." ");
}
elsif ($o == SRL_HDR_REGEXP) {
- printf "%06u: %02x %03s %sREGEXP\n", $p, $o, $bv, $ind;
+ printf "REGEXP\n";
parse_sv($ind." ");
parse_sv($ind." ");
}
elsif ($o == SRL_HDR_FALSE) {
- printf "%06u: %02x %03s %sFALSE\n", $p, $o, $bv, $ind;
+ printf "FALSE\n";
}
elsif ($o == SRL_HDR_TRUE) {
- printf "%06u: %02x %03s %sTRUE\n", $p, $o, $bv, $ind;
+ printf "TRUE\n";
}
else {
+ printf "<UNKNOWN>\n";
die "unsupported type: $o ($t): $const_names{$o}";
}
return 0;
}
return $x;
}
+
+GetOptions(
+ my $opt = {},
+ 'e|stderr',
+);
+
+$| = 1;
+if ($opt->{e}) {
+ select(STDERR);
+}
+
+#print Dumper \%const_names; exit;
+
+local $/ = undef;
+$data = <STDIN>;
+
+open my $fh, "| od -tu1c" or die $!;
+print $fh $data;
+close $fh;
+
+print "\n\nTotal length: " . length($data) . "\n\n";
+
+parse_header();
+while (length $data) {
+ $done = parse_sv("");
+}
return PERL_constant_NOTFOUND;
}
+static int
+constant_21 (pTHX_ const char *name, IV *iv_return) {
+ /* When generated this function returned values for the list of names given
+ here. However, subsequent manual editing may have added or removed some.
+ SRL_HDR_ARRAYREF_HIGH SRL_HDR_OBJECT_FREEZE SRL_HDR_RESERVED_HIGH */
+ /* Offset 13 gives the best switch position. */
+ switch (name[13]) {
+ case 'R':
+ if (memEQ(name, "SRL_HDR_ARRAYREF_HIGH", 21)) {
+ /* ^ */
+#ifdef SRL_HDR_ARRAYREF_HIGH
+ *iv_return = SRL_HDR_ARRAYREF_HIGH;
+ return PERL_constant_ISIV;
+#else
+ return PERL_constant_NOTDEF;
+#endif
+ }
+ break;
+ case 'T':
+ if (memEQ(name, "SRL_HDR_OBJECT_FREEZE", 21)) {
+ /* ^ */
+#ifdef SRL_HDR_OBJECT_FREEZE
+ *iv_return = SRL_HDR_OBJECT_FREEZE;
+ return PERL_constant_ISIV;
+#else
+ return PERL_constant_NOTDEF;
+#endif
+ }
+ break;
+ case 'V':
+ if (memEQ(name, "SRL_HDR_RESERVED_HIGH", 21)) {
+ /* ^ */
+#ifdef SRL_HDR_RESERVED_HIGH
+ *iv_return = SRL_HDR_RESERVED_HIGH;
+ return PERL_constant_ISIV;
+#else
+ return PERL_constant_NOTDEF;
+#endif
+ }
+ break;
+ }
+ return PERL_constant_NOTFOUND;
+}
+
static int
constant_25 (pTHX_ const char *name, IV *iv_return) {
/* When generated this function returned values for the list of names given
here. However, subsequent manual editing may have added or removed some.
SRL_HDR_SHORT_BINARY_HIGH SRL_MASK_SHORT_BINARY_LEN
- SRL_PROTOCOL_ENCODING_RAW SRL_PROTOCOL_VERSION_BITS
- SRL_PROTOCOL_VERSION_MASK */
+ SRL_PROTOCOL_ENCODING_RAW SRL_PROTOCOL_HDR_CONTINUE
+ SRL_PROTOCOL_VERSION_BITS SRL_PROTOCOL_VERSION_MASK */
/* Offset 24 gives the best switch position. */
switch (name[24]) {
+ case 'E':
+ if (memEQ(name, "SRL_PROTOCOL_HDR_CONTINU", 24)) {
+ /* E */
+#ifdef SRL_PROTOCOL_HDR_CONTINUE
+ *iv_return = SRL_PROTOCOL_HDR_CONTINUE;
+ return PERL_constant_ISIV;
+#else
+ return PERL_constant_NOTDEF;
+#endif
+ }
+ break;
case 'H':
if (memEQ(name, "SRL_HDR_SHORT_BINARY_HIG", 24)) {
/* H */
SRL_HDR_FLOAT SRL_HDR_HASH SRL_HDR_HASHREF SRL_HDR_HASHREF_HIGH
SRL_HDR_HASHREF_LOW SRL_HDR_LONG_DOUBLE SRL_HDR_MANY SRL_HDR_NEG
SRL_HDR_NEG_HIGH SRL_HDR_NEG_LOW SRL_HDR_OBJECT SRL_HDR_OBJECTV
+ SRL_HDR_OBJECTV_FREEZE SRL_HDR_OBJECT_FREEZE
SRL_HDR_PACKET_START SRL_HDR_PAD SRL_HDR_POS SRL_HDR_POS_HIGH
SRL_HDR_POS_LOW SRL_HDR_REFN SRL_HDR_REFP SRL_HDR_REGEXP
SRL_HDR_RESERVED SRL_HDR_RESERVED_HIGH SRL_HDR_RESERVED_LOW
SRL_MASK_SHORT_BINARY_LEN SRL_NEG_MIN_SIZE SRL_POS_MAX_SIZE
SRL_PROTOCOL_ENCODING_MASK SRL_PROTOCOL_ENCODING_RAW
SRL_PROTOCOL_ENCODING_SNAPPY
- SRL_PROTOCOL_ENCODING_SNAPPY_INCREMENTAL SRL_PROTOCOL_VERSION
- SRL_PROTOCOL_VERSION_BITS SRL_PROTOCOL_VERSION_MASK));
+ SRL_PROTOCOL_ENCODING_SNAPPY_INCREMENTAL
+ SRL_PROTOCOL_HDR_CONTINUE SRL_PROTOCOL_HDR_USER_DATA
+ SRL_PROTOCOL_VERSION SRL_PROTOCOL_VERSION_BITS
+ SRL_PROTOCOL_VERSION_MASK));
print constant_types(), "\n"; # macro defs
foreach (C_constant ("Sereal::Encoder::Constants", 'constant', 'IV', $types, undef, 3, @names) ) {
return constant_20 (aTHX_ name, iv_return);
break;
case 21:
- /* Names all of length 21. */
- /* SRL_HDR_ARRAYREF_HIGH SRL_HDR_RESERVED_HIGH */
- /* Offset 10 gives the best switch position. */
- switch (name[10]) {
+ return constant_21 (aTHX_ name, iv_return);
+ break;
+ case 22:
+ /* Names all of length 22. */
+ /* SRL_HDR_OBJECTV_FREEZE SRL_MASK_HASHREF_COUNT */
+ /* Offset 6 gives the best switch position. */
+ switch (name[6]) {
case 'R':
- if (memEQ(name, "SRL_HDR_ARRAYREF_HIGH", 21)) {
- /* ^ */
-#ifdef SRL_HDR_ARRAYREF_HIGH
- *iv_return = SRL_HDR_ARRAYREF_HIGH;
+ if (memEQ(name, "SRL_HDR_OBJECTV_FREEZE", 22)) {
+ /* ^ */
+#ifdef SRL_HDR_OBJECTV_FREEZE
+ *iv_return = SRL_HDR_OBJECTV_FREEZE;
return PERL_constant_ISIV;
#else
return PERL_constant_NOTDEF;
}
break;
case 'S':
- if (memEQ(name, "SRL_HDR_RESERVED_HIGH", 21)) {
- /* ^ */
-#ifdef SRL_HDR_RESERVED_HIGH
- *iv_return = SRL_HDR_RESERVED_HIGH;
+ if (memEQ(name, "SRL_MASK_HASHREF_COUNT", 22)) {
+ /* ^ */
+#ifdef SRL_MASK_HASHREF_COUNT
+ *iv_return = SRL_MASK_HASHREF_COUNT;
return PERL_constant_ISIV;
#else
return PERL_constant_NOTDEF;
break;
}
break;
- case 22:
- if (memEQ(name, "SRL_MASK_HASHREF_COUNT", 22)) {
-#ifdef SRL_MASK_HASHREF_COUNT
- *iv_return = SRL_MASK_HASHREF_COUNT;
- return PERL_constant_ISIV;
-#else
- return PERL_constant_NOTDEF;
-#endif
- }
- break;
case 23:
if (memEQ(name, "SRL_MASK_ARRAYREF_COUNT", 23)) {
#ifdef SRL_MASK_ARRAYREF_COUNT
return constant_25 (aTHX_ name, iv_return);
break;
case 26:
- if (memEQ(name, "SRL_PROTOCOL_ENCODING_MASK", 26)) {
+ /* Names all of length 26. */
+ /* SRL_PROTOCOL_ENCODING_MASK SRL_PROTOCOL_HDR_USER_DATA */
+ /* Offset 24 gives the best switch position. */
+ switch (name[24]) {
+ case 'S':
+ if (memEQ(name, "SRL_PROTOCOL_ENCODING_MASK", 26)) {
+ /* ^ */
#ifdef SRL_PROTOCOL_ENCODING_MASK
- *iv_return = SRL_PROTOCOL_ENCODING_MASK;
- return PERL_constant_ISIV;
+ *iv_return = SRL_PROTOCOL_ENCODING_MASK;
+ return PERL_constant_ISIV;
#else
- return PERL_constant_NOTDEF;
+ return PERL_constant_NOTDEF;
#endif
+ }
+ break;
+ case 'T':
+ if (memEQ(name, "SRL_PROTOCOL_HDR_USER_DATA", 26)) {
+ /* ^ */
+#ifdef SRL_PROTOCOL_HDR_USER_DATA
+ *iv_return = SRL_PROTOCOL_HDR_USER_DATA;
+ return PERL_constant_ISIV;
+#else
+ return PERL_constant_NOTDEF;
+#endif
+ }
+ break;
}
break;
case 27:
open my $fh, "<", "srl_protocol.h" or die $!;
my (@string_const, @int_const);
while (<$fh>) {
- if (/^#\s*define\s*(SRL_\w+)\s*(.*)$/) {
+ if (/^#\s*define\s*(SRL_\w+)\s*(.*?)(?:\/\*|$)/) {
my ($name, $value) = ($1, $2);
next if $name =~ /_H_$/ or $name =~ /SET/ or $value =~ /"/;
push @int_const, $name;
use Carp qw/croak/;
use XSLoader;
-our $VERSION = '0.37'; # Don't forget to update the TestCompat set for testing against installed decoders!
+our $VERSION = '2.03'; # Don't forget to update the TestCompat set for testing against installed decoders!
# not for public consumption, just for testing.
-my $TestCompat = [ map sprintf("%.2f", $_/100), reverse( 23 .. int($VERSION * 100) ) ]; # compat with 0.23 to ...
+(my $num_version = $VERSION) =~ s/_//;
+my $TestCompat = [ map sprintf("%.2f", $_/100), reverse( 200 .. int($num_version * 100) ) ]; # compat with 2.00 to ...
sub _test_compat {return(@$TestCompat, $VERSION)}
use Exporter 'import';
-our @EXPORT_OK = qw(encode_sereal);
+our @EXPORT_OK = qw(encode_sereal encode_sereal_with_header_data);
our %EXPORT_TAGS = (all => \@EXPORT_OK);
# export by default if run from command line
our @EXPORT = ((caller())[1] eq '-e' ? @EXPORT_OK : ());
The two are released separately to allow for independent and safer upgrading.
The Sereal protocol version emitted by this encoder implementation is currently
-protocol version 1.
+protocol version 2 by default.
The protocol specification and many other bits of documentation
can be found in the github repository. Right now, the specification is at
Currently, the following options are recognized, none of them are on
by default.
-=head3 no_shared_hashkeys
-
-When the C<no_shared_hashkeys> option is set ot a true value, then
-the encoder will disable the detection and elimination of repeated hash
-keys. This only has an effect for serializing structures containing hashes.
-By skipping the detection of repeated hash keys, performance goes up a bit,
-but the size of the output can potentially be much larger.
-Do not disable this unless you have a reason to.
-
=head3 snappy
If set, the main payload of the Sereal document will be compressed using
The decoder (version 0.04 and up) will know how to handle Snappy-compressed
Sereal documents transparently.
-B<NOTE 1:> Do not use this if you want to parse multiple Sereal packets
-from the same buffer. Instead use C<snappy_incr> instead.
+B<Note:> The C<snappy_incr> and C<snappy> options are identical in
+Sereal protocol V2 (the default). If using the C<use_protocol_v1> option
+to emit Sereal V1 documents, this emits non-incrementally decodable
+documents. See C<snappy_incr> in those cases.
=head3 snappy_incr
-Enables a version of the snappy protocol which is suitable for incremental
-parsing of packets. See also the C<snappy> option above for more details.
+Same as the C<snappy> option for default (Sereal V2) operation.
+
+In Sereal V1, enables a version of the snappy protocol which is suitable for
+incremental parsing of packets. See also the C<snappy> option above for
+more details.
=head3 snappy_threshold
snappy compression is not even attempted even if enabled.
Defaults to one kilobyte (1024 bytes). Set to 0 and C<snappy> to enabled
to always compress.
+Note that the document will not be compressed if the resulting size
+will be bigger than the original size (even if snappy_threshold is 0).
=head3 croak_on_bless
When both flags are set, C<croak_on_bless> has a higher precedence then
C<no_bless_objects>.
+=head3 freeze_callbacks
+
+This option is new in Sereal v2 and needs a Sereal v2 decoder.
+
+If this option is set, the encoder will check for and possibly invoke
+the C<FREEZE> method on any object in the input data. An object that
+was serialized using its C<FREEZE> method will have its corresponding
+C<THAW> class method called during deserialization. The exact semantics
+are documented below under L</"FREEZE/THAW CALLBACK MECHANISM">.
+
+Beware that using this functionality means a significant slowdown for
+object serialization. Even when serializing objects without a C<FREEZE>
+method, the additional method look up will cost a small amount of runtime.
+Yes, C<Sereal::Encoder> is so fast that is may make a difference.
+
=head3 no_bless_objects
If this option is set, then the encoder will serialize blessed references
See L</NON-CANONICAL> for why you might want to use this, and for the
various caveats involved.
+=head3 no_shared_hashkeys
+
+When the C<no_shared_hashkeys> option is set ot a true value, then
+the encoder will disable the detection and elimination of repeated hash
+keys. This only has an effect for serializing structures containing hashes.
+By skipping the detection of repeated hash keys, performance goes up a bit,
+but the size of the output can potentially be much larger.
+
+Do not disable this unless you have a reason to.
+
=head3 dedupe_strings
If this is option is enabled/true then Sereal will use a hash to encode duplicates
longer than 3 characters will be deduped, however this may change in the future.
Note that Sereal will perform certain types of deduping automatically even
-without this option. In particular class names and hash keys are deduped
+without this option. In particular class names and hash keys (see also the
+C<no_shared_hashkeys> setting) are deduped
regardless of this option. Only enable this if you have good reason to
believe that there are many duplicated strings as values in your data
structure.
-Use of this option does not require an upgraded decoder. The deduping
+Use of this option does not require an upgraded decoder (this option was added in
+Sereal::Encoder 0.32). The deduping
is performed in such a way that older decoders should handle it just fine.
In other words, the output of a Sereal B<decoder> should not depend on
whether this option was used during B<encoding>. See also below:
using (decoding) the data may save a lot of memory in some situations
but at the cost of potential action at a distance due to the aliasing.
-Beware: The test suite currently does not cover this option as well as it
+I<Beware:> The test suite currently does not cover this option as well as it
probably should. Patches welcome.
+=head3 use_protocol_v1
+
+If set, the encoder will emit Sereal documents following protocol version 1.
+This is strongly discouraged except for temporary
+compatibility/migration purposes.
+
=head1 INSTANCE METHODS
=head2 encode
of this distribution. Suffice to say that this library is easily competitive
in both time and space efficiency with the best alternatives.
+=head1 FREEZE/THAW CALLBACK MECHANISM
+
+This mechanism is enabled using the C<freeze_callbacks> option of the encoder.
+It is inspired by the equivalent mechanism in L<CBOR::XS> and differs only
+in one minor detail, explained below. The general mechanism is documented
+in the I<A GENERIC OBJECT SERIALIATION PROTOCOL> section of L<Types::Serializer>.
+Similar to CBOR using C<CBOR>, Sereal uses the string C<Sereal> as a serializer
+identifier for the callbacks.
+
+The one difference to the mechanism as supported by CBOR is that in Sereal,
+the C<FREEZE> callback must return a single value. That value can be any
+data structure supported by Sereal (hopefully without causing infinite recursion
+by including the original object). But C<FREEZE> can't return a list as with CBOR.
+This should not be any practical limitation whatsoever. Just return an array
+reference instead of a list.
+
+Here is a contrived example of a class implementing the C<FREEZE> / C<THAW> mechansim.
+
+ package
+ File;
+
+ use Moo;
+
+ has 'path' => (is => 'ro');
+ has 'fh' => (is => 'rw');
+
+ # open file handle if necessary and return it
+ sub get_fh {
+ my $self = shift;
+ # This could also with fancier Moo(se) syntax
+ my $fh = $self->fh;
+ if (not $fh) {
+ open $fh, "<", $self->path or die $!;
+ $self->fh($fh);
+ }
+ return $fh;
+ }
+
+ sub FREEZE {
+ my ($self, $serializer) = @_;
+ # Could switch on $serializer here: JSON, CBOR, Sereal, ...
+ # But this case is so simple that it will work with ALL of them.
+ # Do not try to serialize our file handle! Path will be enough
+ # to recreate.
+ return $self->path;
+ }
+
+ sub THAW {
+ my ($class, $serializer, $data) = @_;
+ # Turn back into object.
+ return $class->new(path => $data);
+ }
+
+Why is the C<FREEZE>/C<THAW> mechanism important here? Our contrived C<File>
+class may contain a file handle which can't be serialized. So C<FREEZE> not
+only returns just the path (which is more compact than encoding the actual
+object contents), but it strips the file handle which can be lazily reopened
+on the other side of the serialization/deserialization pipe.
+But this example also shows that a naive implementation can easily end up
+with subtle bugs. A file handle itself has state (position in file, etc).
+Thus the deserialization in the above example won't accurately reproduce
+the original state. It can't, of course, if it's deserialized in a different
+environment anyway.
+
=head1 THREAD-SAFETY
C<Sereal::Encoder> is thread-safe on Perl's 5.8.7 and higher. This means
occasional false-negative, but think carefully before applying Sereal in other
use-cases.
-=head1 AUTHOR
+=head1 BUGS, CONTACT AND SUPPORT
+
+For reporting bugs, please use the github bug tracker at
+L<http://github.com/Sereal/Sereal/issues>.
+
+For support and discussion of Sereal, there are two Google Groups:
+
+Announcements around Sereal (extremely low volume):
+L<https://groups.google.com/forum/?fromgroups#!forum/sereal-announce>
+
+Sereal development list:
+L<https://groups.google.com/forum/?fromgroups#!forum/sereal-dev>
+
+=head1 AUTHORS
Yves Orton E<lt>demerphq@gmail.comE<gt>
=head1 COPYRIGHT AND LICENSE
-Copyright (C) 2012, 2013 by Steffen Mueller
-Copyright (C) 2012, 2013 by Yves Orton
+Copyright (C) 2012, 2013, 2014 by Steffen Mueller
+Copyright (C) 2012, 2013, 2014 by Yves Orton
The license for the code in this distribution is the following,
with the exceptions listed below:
SRL_PROTOCOL_ENCODING_RAW
SRL_PROTOCOL_ENCODING_SNAPPY
SRL_PROTOCOL_ENCODING_SNAPPY_INCREMENTAL
+ SRL_PROTOCOL_HDR_USER_DATA
+ SRL_PROTOCOL_HDR_CONTINUE
SRL_POS_MAX_SIZE
SRL_NEG_MIN_SIZE
SRL_HDR_POS
SRL_HDR_COPY
SRL_HDR_WEAKEN
SRL_HDR_REGEXP
+ SRL_HDR_OBJECT_FREEZE
+ SRL_HDR_OBJECTV_FREEZE
SRL_HDR_RESERVED
SRL_HDR_RESERVED_LOW
SRL_HDR_RESERVED_HIGH
#define PTABLE_HASH(ptr) ptr_hash(PTR2nat(ptr))
+#define PTABLE_FLAG_AUTOCLEAN 1
+
+typedef struct PTABLE_entry PTABLE_ENTRY_t;
+typedef struct PTABLE PTABLE_t;
+typedef struct PTABLE_iter PTABLE_ITER_t;
+
struct PTABLE_entry {
struct PTABLE_entry *next;
void *key;
struct PTABLE_entry **tbl_ary;
UV tbl_max;
UV tbl_items;
+ PTABLE_ITER_t *cur_iter; /* one iterator at a time can be auto-freed */
};
struct PTABLE_iter {
struct PTABLE_entry *cur_entry;
};
-typedef struct PTABLE_entry PTABLE_ENTRY_t;
-typedef struct PTABLE PTABLE_t;
-typedef struct PTABLE_iter PTABLE_ITER_t;
-
STATIC PTABLE_t * PTABLE_new(void);
STATIC PTABLE_t * PTABLE_new_size(const U8 size_base2_exponent);
STATIC void PTABLE_delete(PTABLE_t *tbl, void *key);
STATIC void PTABLE_grow(PTABLE_t *tbl);
STATIC void PTABLE_clear(PTABLE_t *tbl);
+STATIC void PTABLE_clear_dec(pTHX_ PTABLE_t *tbl);
STATIC void PTABLE_free(PTABLE_t *tbl);
STATIC PTABLE_ITER_t * PTABLE_iter_new(PTABLE_t *tbl);
+STATIC PTABLE_ITER_t * PTABLE_iter_new_flags(PTABLE_t *tbl, int flags);
STATIC PTABLE_ENTRY_t * PTABLE_iter_next(PTABLE_ITER_t *iter);
STATIC void PTABLE_iter_free(PTABLE_ITER_t *iter);
Newxz(tbl, 1, PTABLE_t);
tbl->tbl_max = (1 << size_base2_exponent) - 1;
tbl->tbl_items = 0;
+ tbl->cur_iter = NULL;
Newxz(tbl->tbl_ary, tbl->tbl_max + 1, PTABLE_ENTRY_t*);
return tbl;
}
}
}
+STATIC void
+PTABLE_clear_dec(pTHX_ PTABLE_t *tbl)
+{
+ if (tbl && tbl->tbl_items) {
+ register PTABLE_ENTRY_t * * const array = tbl->tbl_ary;
+ UV riter = tbl->tbl_max;
+
+ do {
+ PTABLE_ENTRY_t *entry = array[riter];
+
+ while (entry) {
+ PTABLE_ENTRY_t * const oentry = entry;
+ entry = entry->next;
+ if (oentry->value)
+ SvREFCNT_dec((SV*)(oentry->value));
+ Safefree(oentry);
+ }
+
+ /* chocolateboy 2008-01-08
+ *
+ * make sure we clear the array entry, so that subsequent probes fail
+ */
+
+ array[riter] = NULL;
+ } while (riter--);
+
+ tbl->tbl_items = 0;
+ }
+}
+
/* remove one entry from a ptr table */
STATIC void
STATIC void
PTABLE_free(PTABLE_t *tbl)
{
- if (!tbl) {
+ if (!tbl)
return;
- }
+
PTABLE_clear(tbl);
+ if (tbl->cur_iter) {
+ PTABLE_ITER_t *it = tbl->cur_iter;
+ tbl->cur_iter = NULL; /* avoid circular checks */
+ PTABLE_iter_free(it);
+ }
Safefree(tbl->tbl_ary);
Safefree(tbl);
}
/* Create new iterator object */
STATIC PTABLE_ITER_t *
PTABLE_iter_new(PTABLE_t *tbl)
+{
+ return PTABLE_iter_new_flags(tbl, 0);
+}
+
+STATIC PTABLE_ITER_t *
+PTABLE_iter_new_flags(PTABLE_t *tbl, int flags)
{
PTABLE_ITER_t *iter;
Newx(iter, 1, PTABLE_ITER_t);
iter->table = tbl;
iter->bucket_num = 0;
iter->cur_entry = NULL;
+
+ if (flags & PTABLE_FLAG_AUTOCLEAN)
+ tbl->cur_iter = iter;
if (tbl->tbl_items == 0) {
/* Prevent hash bucket scanning.
* This can be a significant optimization on large, empty hashes. */
STATIC void
PTABLE_iter_free(PTABLE_ITER_t *iter)
{
+ /* If we're the iterator that can be auto-cleaned by the PTABLE,
+ * then unregister. */
+ if (iter->table->cur_iter == iter)
+ iter->table->cur_iter = NULL;
+
Safefree(iter);
}
+STATIC void
+PTABLE_debug_dump(PTABLE_t *tbl, void (*func)(PTABLE_ENTRY_t *e))
+{
+ PTABLE_ENTRY_t *e;
+ PTABLE_ITER_t *iter = PTABLE_iter_new(tbl);
+ while (NULL != (e = PTABLE_iter_next(iter))) {
+ func(e);
+ }
+ PTABLE_iter_free(iter);
+}
#endif
if ((len < 12) && (offset < 2048)) {
int len_minus_4 = len - 4;
DCHECK_LT(len_minus_4, 8); /* Must fit in 3 bits */
- *op++ = COPY_1_BYTE_OFFSET |
- ((len_minus_4) << 2) |
+ *op++ = COPY_1_BYTE_OFFSET +
+ ((len_minus_4) << 2) +
((offset >> 8) << 5);
*op++ = offset & 0xff;
} else {
- *op++ = COPY_2_BYTE_OFFSET | ((len-1) << 2);
+ *op++ = COPY_2_BYTE_OFFSET + ((len-1) << 2);
put_unaligned_le16(offset, op);
op += 2;
}
#define __LITTLE_ENDIAN LITTLE_ENDIAN
#define __BIG_ENDIAN BIG_ENDIAN
-#elif defined(__FreeBSD__) || defined(__DragonFlyBSD__) || defined(__NetBSD__)
+#elif defined(__FreeBSD__) || defined(__DragonFly__) || defined(__NetBSD__)
#include <sys/endian.h>
#define bswap_16(x) bswap16(x)
#include "assert.h"
#include "srl_inline.h"
+#include "srl_common.h"
#include "srl_encoder.h"
+#include "srl_buffer_types.h"
+
#ifdef MEMDEBUG
# define BUFFER_GROWTH_FACTOR 1
#else
* usable in one place per compilation unit. Drop "static" when necessary.
* For now, potentially smaller code wins. */
-
/* buffer operations */
-#define BUF_POS_OFS(enc) ((enc)->pos - (enc)->buf_start)
-#define BUF_SPACE(enc) ((enc)->buf_end - (enc)->pos)
-#define BUF_SIZE(enc) ((enc)->buf_end - (enc)->buf_start)
-#define BUF_NEED_GROW(enc, minlen) ((size_t)BUF_SPACE(enc) <= minlen)
-#define BUF_NEED_GROW_TOTAL(enc, minlen) ((size_t)BUF_SIZE(enc) <= minlen)
+#define BUF_POS_OFS(buf) (((buf).pos) - ((buf).start))
+#define BUF_SPACE(buf) (((buf).end) - ((buf).pos))
+#define BUF_SIZE(buf) (((buf).end) - ((buf).start))
+#define BUF_NEED_GROW(buf, minlen) ((size_t)BUF_SPACE(buf) <= minlen)
+#define BUF_NEED_GROW_TOTAL(buf, minlen) ((size_t)BUF_SIZE(buf) <= minlen)
+
+
+/* body-position/size related operations */
+#define BODY_POS_OFS(buf) (((buf).pos) - ((buf).body_pos))
+
+/* these are mostly for right between (de)serializing the header and the body */
+#define SRL_SET_BODY_POS(enc, pos_ptr) ((enc)->buf.body_pos = pos_ptr)
+#define SRL_UPDATE_BODY_POS(enc) \
+ STMT_START { \
+ if (expect_false(SRL_ENC_HAVE_OPTION((enc), SRL_F_USE_PROTO_V1))) { \
+ SRL_SET_BODY_POS(enc, (enc)->buf.start); \
+ } else { \
+ SRL_SET_BODY_POS(enc, (enc)->buf.pos-1); \
+ } \
+ } STMT_END
+
/* Internal debugging macros, used only in DEBUG mode */
#ifndef NDEBUG
#define DEBUG_ASSERT_BUF_SPACE(enc, len) STMT_START { \
- if((BUF_SPACE(enc) < (ptrdiff_t)(len))) { \
+ if((BUF_SPACE(enc->buf) < (ptrdiff_t)(len))) { \
warn("failed assertion check - pos: %ld [%p %p %p] %ld < %ld", \
- (long)BUF_POS_OFS(enc), (enc)->buf_start, (enc)->pos, (enc)->buf_end, (long)BUF_SPACE(enc),(long)(len)); \
+ (long)BUF_POS_OFS(enc->buf), (enc)->buf.start, (enc)->buf.pos, (enc)->buf.end, (long)BUF_SPACE(enc->buf),(long)(len)); \
} \
- assert(BUF_SPACE(enc) >= (ptrdiff_t)(len)); \
+ assert(BUF_SPACE(enc->buf) >= (ptrdiff_t)(len)); \
} STMT_END
#else
#define DEBUG_ASSERT_BUF_SPACE(enc, len) ((void)0)
#ifndef NDEBUG
#define DEBUG_ASSERT_BUF_SANE(enc) STMT_START { \
- if(!(((enc)->buf_start <= (enc)->pos) && ((enc)->pos <= (enc)->buf_end))){\
+ if(!(((enc)->buf.start <= (enc)->buf.pos) && ((enc)->buf.pos <= (enc)->buf.end))){\
warn("failed sanity assertion check - pos: %ld [%p %p %p] %ld", \
- (long)BUF_POS_OFS(enc), (enc)->buf_start, (enc)->pos, (enc)->buf_end, (long)BUF_SPACE(enc)); \
+ (long)BUF_POS_OFS(enc->buf), (enc)->buf.start, (enc)->buf.pos, (enc)->buf.end, (long)BUF_SPACE(enc->buf)); \
} \
- assert(((enc)->buf_start <= (enc)->pos) && ((enc)->pos <= (enc)->buf_end));\
+ assert(((enc)->buf.start <= (enc)->buf.pos) && ((enc)->buf.pos <= (enc)->buf.end));\
} STMT_END
#else
-#define DEBUG_ASSERT_BUF_SANE(enc) assert(((enc)->buf_start <= (enc)->pos) && ((enc)->pos <= (enc)->buf_end))
+#define DEBUG_ASSERT_BUF_SANE(enc) assert(((enc)->buf.start <= (enc)->buf.pos) && ((enc)->buf.pos <= (enc)->buf.end))
#endif
+/* Allocate a virgin buffer (but not the buffer struct) */
+SRL_STATIC_INLINE int
+srl_buf_init_buffer(pTHX_ srl_buffer_t *buf, const STRLEN init_size)
+{
+ Newx(buf->start, init_size, char);
+ if (expect_false( buf->start == NULL ))
+ return 1;
+ buf->end = buf->start + init_size - 1;
+ buf->pos = buf->start;
+ buf->body_pos = buf->start; /* SRL_SET_BODY_POS(enc, enc->buf.start) equiv */
+ return 0;
+}
+
+/* Free a buffer (but not the buffer struct) */
+SRL_STATIC_INLINE void
+srl_buf_free_buffer(pTHX_ srl_buffer_t *buf)
+{
+ Safefree(buf->start);
+}
+
+/* Copy one buffer to another (shallowly!) */
+SRL_STATIC_INLINE void
+srl_buf_copy_buffer(pTHX_ srl_buffer_t *src, srl_buffer_t *dest)
+{
+ Copy(src, dest, 1, srl_buffer_t);
+}
+
+/* Swap two buffers */
+SRL_STATIC_INLINE void
+srl_buf_swap_buffer(pTHX_ srl_buffer_t *buf1, srl_buffer_t *buf2)
+{
+ srl_buffer_t tmp;
+ Copy(buf1, &tmp, 1, srl_buffer_t);
+ Copy(buf2, buf1, 1, srl_buffer_t);
+ Copy(&tmp, buf2, 1, srl_buffer_t);
+}
+
+
SRL_STATIC_INLINE void
srl_buf_grow_nocheck(pTHX_ srl_encoder_t *enc, size_t minlen)
{
- const size_t pos_ofs= BUF_POS_OFS(enc); /* have to store the offset of pos */
+ const size_t pos_ofs= BUF_POS_OFS(enc->buf); /* have to store the offset of pos */
+ const size_t body_ofs= enc->buf.body_pos - enc->buf.start; /* have to store the offset of the body */
#ifdef MEMDEBUG
const size_t new_size = minlen;
#else
- const size_t cur_size = BUF_SIZE(enc);
+ const size_t cur_size = BUF_SIZE(enc->buf);
const size_t grown_len = (size_t)(cur_size * BUFFER_GROWTH_FACTOR);
const size_t new_size = 100 + (minlen > grown_len ? minlen : grown_len);
#endif
+
DEBUG_ASSERT_BUF_SANE(enc);
/* assert that Renew means GROWING the buffer */
- assert(enc->buf_start + new_size > enc->buf_end);
- Renew(enc->buf_start, new_size, char);
- if (enc->buf_start == NULL)
+ assert(enc->buf.start + new_size > enc->buf.end);
+
+ Renew(enc->buf.start, new_size, char);
+ if (enc->buf.start == NULL)
croak("Out of memory!");
- enc->buf_end = (char *)(enc->buf_start + new_size);
- enc->pos= enc->buf_start + pos_ofs;
- assert(enc->buf_end - enc->buf_start > (ptrdiff_t)0);
+ enc->buf.end = (char *)(enc->buf.start + new_size);
+ enc->buf.pos= enc->buf.start + pos_ofs;
+ SRL_SET_BODY_POS(enc, enc->buf.start + body_ofs);
+
+ DEBUG_ASSERT_BUF_SANE(enc);
+ assert(enc->buf.end - enc->buf.start > (ptrdiff_t)0);
+ assert(enc->buf.pos - enc->buf.start >= (ptrdiff_t)0);
+ /* The following is checking against -1 because SRL_UPDATE_BODY_POS
+ * will actually set the body_pos to pos-1, where pos can be 0.
+ * This works out fine in the end, but is admittedly a bit shady.
+ * FIXME */
+ assert(enc->buf.body_pos - enc->buf.start >= (ptrdiff_t)-1);
}
-#define BUF_SIZE_ASSERT(enc, minlen) \
- STMT_START { \
- DEBUG_ASSERT_BUF_SANE(enc); \
- if (BUF_NEED_GROW(enc, minlen)) \
- srl_buf_grow_nocheck(aTHX_ (enc), (BUF_SIZE(enc) + minlen)); \
- DEBUG_ASSERT_BUF_SANE(enc); \
+#define BUF_SIZE_ASSERT(enc, minlen) \
+ STMT_START { \
+ DEBUG_ASSERT_BUF_SANE(enc); \
+ if (BUF_NEED_GROW(enc->buf, minlen)) \
+ srl_buf_grow_nocheck(aTHX_ (enc), (BUF_SIZE(enc->buf) + minlen)); \
+ DEBUG_ASSERT_BUF_SANE(enc); \
} STMT_END
-#define BUF_SIZE_ASSERT_TOTAL(enc, minlen) \
- STMT_START { \
- DEBUG_ASSERT_BUF_SANE(enc); \
- if (BUF_NEED_GROW_TOTAL(enc, minlen)) \
- srl_buf_grow_nocheck(aTHX_ (enc), (minlen)); \
- DEBUG_ASSERT_BUF_SANE(enc); \
+#define BUF_SIZE_ASSERT_TOTAL(enc, minlen) \
+ STMT_START { \
+ DEBUG_ASSERT_BUF_SANE(enc); \
+ if (BUF_NEED_GROW_TOTAL(enc->buf, minlen)) \
+ srl_buf_grow_nocheck(aTHX_ (enc), (minlen)); \
+ DEBUG_ASSERT_BUF_SANE(enc); \
} STMT_END
SRL_STATIC_INLINE void
srl_buf_cat_str_int(pTHX_ srl_encoder_t *enc, const char *str, size_t len)
{
BUF_SIZE_ASSERT(enc, len);
- Copy(str, enc->pos, len, char);
- enc->pos += len;
+ Copy(str, enc->buf.pos, len, char);
+ enc->buf.pos += len;
DEBUG_ASSERT_BUF_SANE(enc);
}
#define srl_buf_cat_str(enc, str, len) srl_buf_cat_str_int(aTHX_ enc, str, len)
{
DEBUG_ASSERT_BUF_SANE(enc);
DEBUG_ASSERT_BUF_SPACE(enc, len);
- Copy(str, enc->pos, len, char);
- enc->pos += len;
+ Copy(str, enc->buf.pos, len, char);
+ enc->buf.pos += len;
DEBUG_ASSERT_BUF_SANE(enc);
}
#define srl_buf_cat_str_nocheck(enc, str, len) srl_buf_cat_str_nocheck_int(aTHX_ enc, str, len)
DEBUG_ASSERT_BUF_SANE(enc);
BUF_SIZE_ASSERT(enc, 1);
DEBUG_ASSERT_BUF_SPACE(enc, 1);
- *enc->pos++ = c;
+ *enc->buf.pos++ = c;
DEBUG_ASSERT_BUF_SANE(enc);
}
#define srl_buf_cat_char(enc, c) srl_buf_cat_char_int(aTHX_ enc, c)
{
DEBUG_ASSERT_BUF_SANE(enc);
DEBUG_ASSERT_BUF_SPACE(enc, 1);
- *enc->pos++ = c;
+ *enc->buf.pos++ = c;
DEBUG_ASSERT_BUF_SANE(enc);
}
#define srl_buf_cat_char_nocheck(enc, c) srl_buf_cat_char_nocheck_int(aTHX_ enc, c)
srl_buf_cat_varint_nocheck(pTHX_ srl_encoder_t *enc, const char tag, UV n) {
DEBUG_ASSERT_BUF_SANE(enc);
DEBUG_ASSERT_BUF_SPACE(enc, (tag==0 ? 0 : 1) + SRL_MAX_VARINT_LENGTH);
- if (tag)
- *enc->pos++ = tag;
- while (n >= 0x80) { /* while we are larger than 7 bits long */
- *enc->pos++ = (n & 0x7f) | 0x80; /* write out the least significant 7 bits, set the high bit */
- n = n >> 7; /* shift off the 7 least significant bits */
+ if (expect_true( tag ))
+ *enc->buf.pos++ = tag;
+ while (n >= 0x80) { /* while we are larger than 7 bits long */
+ *enc->buf.pos++ = (n & 0x7f) | 0x80; /* write out the least significant 7 bits, set the high bit */
+ n = n >> 7; /* shift off the 7 least significant bits */
}
- *enc->pos++ = n; /* encode the last 7 bits without the high bit being set */
+ *enc->buf.pos++ = n; /* encode the last 7 bits without the high bit being set */
DEBUG_ASSERT_BUF_SANE(enc);
}
--- /dev/null
+#ifndef SRL_BUFFER_TYPES_H_
+#define SRL_BUFFER_TYPES_H_
+
+typedef struct {
+ char *start; /* ptr to "physical" start of output buffer */
+ char *end; /* ptr to end of output buffer */
+ char *pos; /* ptr to current position within output buffer */
+ char *body_pos; /* ptr to start of body within output buffer for protocol V2 encoding */
+} srl_buffer_t;
+
+#endif
#define DEBUGHACK 0
/* some static function declarations */
+SRL_STATIC_INLINE void srl_clear_seen_hashes(pTHX_ srl_encoder_t *enc);
static void srl_dump_sv(pTHX_ srl_encoder_t *enc, SV *src);
SRL_STATIC_INLINE void srl_dump_svpv(pTHX_ srl_encoder_t *enc, SV *src);
SRL_STATIC_INLINE void srl_dump_pv(pTHX_ srl_encoder_t *enc, const char* src, STRLEN src_len, int is_utf8);
SRL_STATIC_INLINE void srl_dump_hk(pTHX_ srl_encoder_t *enc, HE *src, const int share_keys);
SRL_STATIC_INLINE void srl_dump_nv(pTHX_ srl_encoder_t *enc, SV *src);
SRL_STATIC_INLINE void srl_dump_ivuv(pTHX_ srl_encoder_t *enc, SV *src);
-SRL_STATIC_INLINE void srl_dump_classname(pTHX_ srl_encoder_t *enc, SV *src);
+SRL_STATIC_INLINE void srl_dump_classname(pTHX_ srl_encoder_t *enc, SV *referent, SV *replacement);
+SRL_STATIC_INLINE SV *srl_get_frozen_object(pTHX_ srl_encoder_t *enc, SV *src, SV *referent);
SRL_STATIC_INLINE PTABLE_t *srl_init_string_hash(srl_encoder_t *enc);
SRL_STATIC_INLINE PTABLE_t *srl_init_ref_hash(srl_encoder_t *enc);
+SRL_STATIC_INLINE PTABLE_t *srl_init_freezeobj_svhash(srl_encoder_t *enc);
SRL_STATIC_INLINE PTABLE_t *srl_init_weak_hash(srl_encoder_t *enc);
SRL_STATIC_INLINE HV *srl_init_string_deduper_hv(pTHX_ srl_encoder_t *enc);
? srl_init_weak_hash(enc) \
: (enc)->weak_seenhash )
+#define SRL_GET_FREEZEOBJ_SVHASH(enc) ( (enc)->freezeobj_svhash == NULL \
+ ? srl_init_freezeobj_svhash(enc) \
+ : (enc)->freezeobj_svhash )
+
#define CALL_SRL_DUMP_SV(enc, src) STMT_START { \
if (!(src)) { \
srl_buf_cat_char((enc), SRL_HDR_UNDEF); \
} \
} STMT_END
-
-
/* This is fired when we exit the Perl pseudo-block.
* It frees our encoder and all. Put encoder-level cleanup
* logic here so that we can simply use croak/longjmp for
}
}
-void
-srl_clear_encoder(pTHX_ srl_encoder_t *enc)
+SRL_STATIC_INLINE void
+srl_clear_seen_hashes(pTHX_ srl_encoder_t *enc)
{
- if (!SRL_ENC_HAVE_OPER_FLAG(enc, SRL_OF_ENCODER_DIRTY)) {
- warn("Sereal Encoder being cleared but in virgin state. That is unexpected.");
- }
-
- enc->recursion_depth = 0;
if (enc->ref_seenhash != NULL)
PTABLE_clear(enc->ref_seenhash);
+ if (enc->freezeobj_svhash != NULL)
+ PTABLE_clear_dec(aTHX_ enc->freezeobj_svhash);
if (enc->str_seenhash != NULL)
PTABLE_clear(enc->str_seenhash);
if (enc->weak_seenhash != NULL)
PTABLE_clear(enc->weak_seenhash);
if (enc->string_deduper_hv != NULL)
hv_clear(enc->string_deduper_hv);
- enc->pos = enc->buf_start;
+}
+
+void
+srl_clear_encoder(pTHX_ srl_encoder_t *enc)
+{
+ if (!SRL_ENC_HAVE_OPER_FLAG(enc, SRL_OF_ENCODER_DIRTY)) {
+ warn("Sereal Encoder being cleared but in virgin state. That is unexpected.");
+ }
+
+ enc->recursion_depth = 0;
+ srl_clear_seen_hashes(aTHX_ enc);
+
+ enc->buf.pos = enc->buf.start;
+ /* tmp_buf.start may be NULL for an unused tmp_buf, but so what? */
+ enc->tmp_buf.pos = enc->tmp_buf.start;
+
+ SRL_SET_BODY_POS(enc, enc->buf.start);
SRL_ENC_RESET_OPER_FLAG(enc, SRL_OF_ENCODER_DIRTY);
}
void
srl_destroy_encoder(pTHX_ srl_encoder_t *enc)
{
- Safefree(enc->buf_start);
+ srl_buf_free_buffer(aTHX_ &enc->buf);
+
+ /* Free tmp buffer only if it was allocated at all. */
+ if (enc->tmp_buf.start != NULL)
+ srl_buf_free_buffer(aTHX_ &enc->tmp_buf);
+
Safefree(enc->snappy_workmem);
if (enc->ref_seenhash != NULL)
PTABLE_free(enc->ref_seenhash);
+ if (enc->freezeobj_svhash != NULL)
+ PTABLE_free(enc->freezeobj_svhash);
if (enc->str_seenhash != NULL)
PTABLE_free(enc->str_seenhash);
if (enc->weak_seenhash != NULL)
PTABLE_free(enc->weak_seenhash);
if (enc->string_deduper_hv != NULL)
SvREFCNT_dec(enc->string_deduper_hv);
+
+ SvREFCNT_dec(enc->sereal_string_sv);
+
Safefree(enc);
}
if (enc == NULL)
croak("Out of memory");
- /* Init struct */
- Newx(enc->buf_start, INITIALIZATION_SIZE, char);
- if (enc->buf_start == NULL) {
+ /* Init buffer struct */
+ if (expect_false( srl_buf_init_buffer(aTHX_ &(enc->buf), INITIALIZATION_SIZE) != 0 )) {
Safefree(enc);
croak("Out of memory");
}
- enc->buf_end = enc->buf_start + INITIALIZATION_SIZE - 1;
- enc->pos = enc->buf_start;
+
+ /* Set the tmp buffer struct's char buffer to NULL so we don't free
+ * something nasty if it's unused. */
+ enc->tmp_buf.start = NULL;
+
enc->recursion_depth = 0;
enc->max_recursion_depth = DEFAULT_MAX_RECUR_DEPTH;
enc->operational_flags = 0;
enc->weak_seenhash = NULL;
enc->str_seenhash = NULL;
enc->ref_seenhash = NULL;
+ enc->freezeobj_svhash = NULL;
enc->snappy_workmem = NULL;
enc->string_deduper_hv = NULL;
+ enc->sereal_string_sv = NULL;
return enc;
}
/* load options */
if (opt != NULL) {
int undef_unknown = 0;
- int snappy = 0;
+ int snappy_nonincr = 0;
/* SRL_F_SHARED_HASHKEYS on by default */
svp = hv_fetchs(opt, "no_shared_hashkeys", 0);
if ( !svp || !SvTRUE(*svp) )
SRL_ENC_SET_OPTION(enc, SRL_F_SHARED_HASHKEYS);
+ /* Needs to be before the snappy options */
+ svp = hv_fetchs(opt, "use_protocol_v1", 0);
+ if ( svp && SvTRUE(*svp) )
+ SRL_ENC_SET_OPTION(enc, SRL_F_USE_PROTO_V1);
+
svp = hv_fetchs(opt, "croak_on_bless", 0);
if ( svp && SvTRUE(*svp) )
SRL_ENC_SET_OPTION(enc, SRL_F_CROAK_ON_BLESS);
if ( svp && SvTRUE(*svp) )
SRL_ENC_SET_OPTION(enc, SRL_F_NO_BLESS_OBJECTS);
+ svp = hv_fetchs(opt, "freeze_callbacks", 0);
+ if ( svp && SvTRUE(*svp) ) {
+ if (SRL_ENC_HAVE_OPTION(enc, SRL_F_NO_BLESS_OBJECTS))
+ croak("The no_bless_objects and freeze_callback_support "
+ "options are mutually exclusive");
+ SRL_ENC_SET_OPTION(enc, SRL_F_ENABLE_FREEZE_SUPPORT);
+ enc->sereal_string_sv = newSVpvs("Sereal");
+ }
+
svp = hv_fetchs(opt, "snappy", 0);
if ( svp && SvTRUE(*svp) ) {
- snappy = 1;
- SRL_ENC_SET_OPTION(enc, SRL_F_COMPRESS_SNAPPY);
+ /* incremental is the new black in V2 */
+ if (expect_true( !SRL_ENC_HAVE_OPTION(enc, SRL_F_USE_PROTO_V1) ))
+ SRL_ENC_SET_OPTION(enc, SRL_F_COMPRESS_SNAPPY_INCREMENTAL);
+ else {
+ snappy_nonincr = 1;
+ SRL_ENC_SET_OPTION(enc, SRL_F_COMPRESS_SNAPPY);
+ }
}
svp = hv_fetchs(opt, "snappy_incr", 0);
if ( svp && SvTRUE(*svp) ) {
- if (snappy)
+ if (snappy_nonincr)
croak("'snappy' and 'snappy_incr' options are mutually exclusive");
SRL_ENC_SET_OPTION(enc, SRL_F_COMPRESS_SNAPPY_INCREMENTAL);
}
return enc->weak_seenhash;
}
+SRL_STATIC_INLINE PTABLE_t *
+srl_init_freezeobj_svhash(srl_encoder_t *enc)
+{
+ enc->freezeobj_svhash = PTABLE_new_size(3);
+ return enc->freezeobj_svhash;
+}
+
SRL_STATIC_INLINE HV *
srl_init_string_deduper_hv(pTHX_ srl_encoder_t *enc)
{
void
-srl_write_header(pTHX_ srl_encoder_t *enc)
+srl_write_header(pTHX_ srl_encoder_t *enc, SV *user_header_src)
{
/* 4th to 8th bit are flags. Using 4th for snappy flag. FIXME needs to go in spec. */
- const U8 version_and_flags = SRL_PROTOCOL_VERSION
+ const U8 version_and_flags = (SRL_ENC_HAVE_OPTION(enc, SRL_F_USE_PROTO_V1) ? 1 : SRL_PROTOCOL_VERSION)
| (
SRL_ENC_HAVE_OPTION(enc, SRL_F_COMPRESS_SNAPPY)
? SRL_PROTOCOL_ENCODING_SNAPPY
BUF_SIZE_ASSERT(enc, sizeof(SRL_MAGIC_STRING) + 1 + 1);
srl_buf_cat_str_s_nocheck(enc, SRL_MAGIC_STRING);
srl_buf_cat_char_nocheck(enc, version_and_flags);
- srl_buf_cat_char_nocheck(enc, '\0'); /* variable header length (0 right now) */
+ if (user_header_src == NULL) {
+ srl_buf_cat_char_nocheck(enc, '\0'); /* variable header length (0 right now) */
+ }
+ else {
+ STRLEN user_data_len;
+
+ if (expect_false( SRL_ENC_HAVE_OPTION(enc, SRL_F_USE_PROTO_V1) ))
+ croak("Cannot serialize user header data in Sereal protocol V1 mode!");
+
+ /* Allocate tmp buffer for swapping if necessary,
+ * will be cleaned up automatically */
+ if (enc->tmp_buf.start == NULL)
+ srl_buf_init_buffer(aTHX_ &enc->tmp_buf, INITIALIZATION_SIZE);
+
+ /* Write document body (for header) into separate buffer */
+ srl_buf_swap_buffer(aTHX_ &enc->tmp_buf, &enc->buf);
+ SRL_UPDATE_BODY_POS(enc);
+ srl_dump_sv(aTHX_ enc, user_header_src);
+ srl_fixup_weakrefs(aTHX_ enc); /* more bodies to follow */
+ srl_clear_seen_hashes(aTHX_ enc); /* more bodies to follow */
+
+ /* Swap main buffer back in, encode header length&bitfield, copy user header data */
+ user_data_len = BUF_POS_OFS(enc->buf);
+ srl_buf_swap_buffer(aTHX_ &enc->buf, &enc->tmp_buf);
+
+ BUF_SIZE_ASSERT(enc, user_data_len + 1 + SRL_MAX_VARINT_LENGTH); /* +1 for bit field, +X for header len */
+
+ /* Encode header length */
+ srl_buf_cat_varint_nocheck(aTHX_ enc, 0, (UV)(user_data_len + 1)); /* +1 for bit field */
+ /* Encode bitfield */
+ srl_buf_cat_char_nocheck(enc, '\1');
+ /* Copy user header data */
+ Copy(enc->tmp_buf.start, enc->buf.pos, user_data_len, char);
+ enc->buf.pos += user_data_len;
+
+ enc->tmp_buf.pos = enc->tmp_buf.start; /* reset tmp buffer just to be clean */
+ }
}
/* The following is to handle the fact that under normal build options
if ( f == nv || nv != nv ) {
BUF_SIZE_ASSERT(enc, 1 + sizeof(f)); /* heuristic: header + string + simple value */
srl_buf_cat_char_nocheck(enc,SRL_HDR_FLOAT);
- Copy((char *)&f, enc->pos, sizeof(f), char);
- enc->pos += sizeof(f);
+ Copy((char *)&f, enc->buf.pos, sizeof(f), char);
+ enc->buf.pos += sizeof(f);
} else if (d == nv) {
BUF_SIZE_ASSERT(enc, 1 + sizeof(d)); /* heuristic: header + string + simple value */
srl_buf_cat_char_nocheck(enc,SRL_HDR_DOUBLE);
- Copy((char *)&d, enc->pos, sizeof(d), char);
- enc->pos += sizeof(d);
+ Copy((char *)&d, enc->buf.pos, sizeof(d), char);
+ enc->buf.pos += sizeof(d);
} else {
BUF_SIZE_ASSERT(enc, 1 + sizeof(nv)); /* heuristic: header + string + simple value */
srl_buf_cat_char_nocheck(enc,SRL_HDR_LONG_DOUBLE);
- Copy((char *)&nv, enc->pos, sizeof(nv), char);
- enc->pos += sizeof(nv);
+ Copy((char *)&nv, enc->buf.pos, sizeof(nv), char);
+ enc->buf.pos += sizeof(nv);
}
}
* we're just wasting some space. */
/* TODO optimize! */
- if (SvIOK_UV(src) || SvIV(src) >= 0) { /* FIXME find a way to express this without repeated SvIV/SvUV */
+ /* FIXME find a way to express the condition without repeated SvIV/SvUV */
+ if (expect_true( SvIOK_UV(src) || SvIV(src) >= 0 )) {
const UV num = SvUV(src); /* FIXME is SvUV_nomg good enough because of the GET magic in dump_sv? SvUVX after having checked the flags? */
if (num < 16) {
/* encodable as POS */
}
}
+/* Dumps the tag and class name of an object doing all necessary callbacks or
+ * exception-throwing.
+ * The provided SV must already have been identified as a Perl object
+ * using sv_isobject().
+ * If the return value is not NULL, then it's the actual object content that
+ * needs to be serialized by the caller. */
+SRL_STATIC_INLINE SV *
+srl_get_frozen_object(pTHX_ srl_encoder_t *enc, SV *src, SV *referent)
+{
+ assert(sv_isobject(src)); /* duplicate asserts are "free" */
+
+ /* Check for FREEZE support */
+ if (expect_false( SRL_ENC_HAVE_OPTION(enc, SRL_F_ENABLE_FREEZE_SUPPORT) )) {
+ HV *stash = SvSTASH(referent);
+ GV *method = NULL;
+ assert(stash != NULL);
+ method = gv_fetchmethod_autoload(stash, "FREEZE", 0);
+
+ if (expect_false( method != NULL )) {
+ SV *replacement= NULL;
+ PTABLE_t *freezeobj_svhash = SRL_GET_FREEZEOBJ_SVHASH(enc);
+ if (SvREFCNT(referent)>1) {
+ replacement= PTABLE_fetch(freezeobj_svhash, referent);
+ }
+ if (!replacement) {
+ int count;
+ dSP;
+ ENTER;
+ SAVETMPS;
+ PUSHMARK(SP);
+
+ EXTEND(SP, 2);
+ PUSHs(src);
+ PUSHs(enc->sereal_string_sv); /* not NULL if SRL_F_ENABLE_FREEZE_SUPPORT is set */
+ replacement= (SV*)newAV();
+ PTABLE_store(freezeobj_svhash, referent, replacement);
+
+ PUTBACK;
+ count = call_sv((SV *)GvCV(method), G_ARRAY);
+ /* TODO explore method lookup caching */
+ SPAGAIN;
+
+ while ( count-- > 0) {
+ SV *tmp = POPs;
+ SvREFCNT_inc(tmp);
+ if (!av_store((AV*)replacement,count,tmp))
+ croak("Failed to push value into array");
+ }
+
+ PUTBACK;
+ FREETMPS;
+ LEAVE;
+ }
+ return replacement;
+ }
+ }
+ return NULL;
+
+}
/* Outputs a bless header and the class name (as some form of string or COPY).
* Caller then has to output the actual reference payload. */
SRL_STATIC_INLINE void
-srl_dump_classname(pTHX_ srl_encoder_t *enc, SV *src)
+srl_dump_classname(pTHX_ srl_encoder_t *enc, SV *referent, SV *replacement)
{
- const HV *stash = SvSTASH(src);
- PTABLE_t *string_seenhash = SRL_GET_STR_PTR_SEENHASH(enc);
- const ptrdiff_t oldoffset = (ptrdiff_t)PTABLE_fetch(string_seenhash, (SV *)stash);
-
- if (oldoffset != 0) {
- /* Issue COPY instead of literal class name string */
- srl_buf_cat_varint(aTHX_ enc, SRL_HDR_OBJECTV, (UV)oldoffset);
- }
- else {
- const char *class_name = HvNAME_get(stash);
- const size_t len = HvNAMELEN_get(stash);
-
- /* First save this new string (well, the HV * that it is represented by) into the string
- * dedupe table.
- * By saving the ptr to the HV, we only dedupe class names with class names, though
- * this seems a small price to pay for not having to keep a full string table.
- * At least, we can safely use the same PTABLE to store the ptrs to hashkeys since
- * the set of pointers will never collide.
- * /me bows to Yves for the delightfully evil hack. */
- srl_buf_cat_char(enc, SRL_HDR_OBJECT);
-
- /* remember current offset before advancing it */
- PTABLE_store(string_seenhash, (void *)stash, (void *)(enc->pos - enc->buf_start));
-
- /* HvNAMEUTF8 not in older perls and it would be 0 for those anyway */
+ /* Check that we actually want to support objects */
+ if (expect_false( SRL_ENC_HAVE_OPTION(enc, SRL_F_CROAK_ON_BLESS)) ) {
+ croak("Attempted to serialize blessed reference. Serializing objects "
+ "using Sereal::Encoder was explicitly disabled using the "
+ "'croak_on_bless' option.");
+ } else if (expect_false( SRL_ENC_HAVE_OPTION(enc, SRL_F_NO_BLESS_OBJECTS) )) {
+ return;
+ } else {
+ const HV *stash = SvSTASH(referent);
+ PTABLE_t *string_seenhash = SRL_GET_STR_PTR_SEENHASH(enc);
+ const ptrdiff_t oldoffset = (ptrdiff_t)PTABLE_fetch(string_seenhash, (SV *)stash);
+
+ if (oldoffset != 0) {
+ /* Issue COPY instead of literal class name string */
+ srl_buf_cat_varint(aTHX_ enc,
+ expect_false(replacement) ? SRL_HDR_OBJECTV_FREEZE : SRL_HDR_OBJECTV,
+ (UV)oldoffset);
+ }
+ else {
+ const char *class_name = HvNAME_get(stash);
+ const size_t len = HvNAMELEN_get(stash);
+
+ /* First save this new string (well, the HV * that it is represented by) into the string
+ * dedupe table.
+ * By saving the ptr to the HV, we only dedupe class names with class names, though
+ * this seems a small price to pay for not having to keep a full string table.
+ * At least, we can safely use the same PTABLE to store the ptrs to hashkeys since
+ * the set of pointers will never collide.
+ * /me bows to Yves for the delightfully evil hack. */
+ srl_buf_cat_char(enc, expect_false(replacement) ? SRL_HDR_OBJECT_FREEZE : SRL_HDR_OBJECT);
+
+ /* remember current offset before advancing it */
+ PTABLE_store(string_seenhash, (void *)stash, (void *)BODY_POS_OFS(enc->buf));
+
+ /* HvNAMEUTF8 not in older perls and it would be 0 for those anyway */
#if PERL_VERSION >= 16
- srl_dump_pv(aTHX_ enc, class_name, len, HvNAMEUTF8(stash));
+ srl_dump_pv(aTHX_ enc, class_name, len, HvNAMEUTF8(stash));
#else
- srl_dump_pv(aTHX_ enc, class_name, len, 0);
+ srl_dump_pv(aTHX_ enc, class_name, len, 0);
#endif
+ }
}
}
srl_reset_snappy_header_flag(srl_encoder_t *enc)
{
/* sizeof(const char *) includes a count of \0 */
- char *flags_and_version_byte = enc->buf_start + sizeof(SRL_MAGIC_STRING) - 1;
+ char *flags_and_version_byte = enc->buf.start + sizeof(SRL_MAGIC_STRING) - 1;
/* disable snappy flag in header */
*flags_and_version_byte = SRL_PROTOCOL_ENCODING_RAW |
(*flags_and_version_byte & SRL_PROTOCOL_VERSION_MASK);
}
void
-srl_dump_data_structure(pTHX_ srl_encoder_t *enc, SV *src)
+srl_dump_data_structure(pTHX_ srl_encoder_t *enc, SV *src, SV *user_header_src)
{
enc = srl_prepare_encoder(aTHX_ enc);
- if (!SRL_ENC_HAVE_OPTION(enc, (SRL_F_COMPRESS_SNAPPY | SRL_F_COMPRESS_SNAPPY_INCREMENTAL))) {
- srl_write_header(aTHX_ enc);
+ if (expect_true( !SRL_ENC_HAVE_OPTION(enc, (SRL_F_COMPRESS_SNAPPY | SRL_F_COMPRESS_SNAPPY_INCREMENTAL)) )) {
+ srl_write_header(aTHX_ enc, user_header_src);
+ SRL_UPDATE_BODY_POS(enc);
srl_dump_sv(aTHX_ enc, src);
srl_fixup_weakrefs(aTHX_ enc);
}
/* Alas, have to write entire packet first since the header length
* will determine offsets. */
- srl_write_header(aTHX_ enc);
- sereal_header_len = BUF_POS_OFS(enc);
+ srl_write_header(aTHX_ enc, user_header_src);
+ sereal_header_len = BUF_POS_OFS(enc->buf);
+ SRL_UPDATE_BODY_POS(enc);
srl_dump_sv(aTHX_ enc, src);
srl_fixup_weakrefs(aTHX_ enc);
- assert(BUF_POS_OFS(enc) > sereal_header_len);
- uncompressed_body_length = BUF_POS_OFS(enc) - sereal_header_len;
+ assert(BUF_POS_OFS(enc->buf) > sereal_header_len);
+ uncompressed_body_length = BUF_POS_OFS(enc->buf) - sereal_header_len;
if (enc->snappy_threshold > 0
&& uncompressed_body_length < (STRLEN)enc->snappy_threshold)
srl_reset_snappy_header_flag(enc);
}
else { /* do snappy compression of body */
- char *old_buf;
+ srl_buffer_t old_buf; /* TODO can we use the enc->tmp_buf here to avoid allocations? */
char *varint_start= NULL;
- char *varint_end;
+ char *varint_end= NULL;
uint32_t dest_len;
/* Get uncompressed payload and total packet output (after compression) lengths */
srl_init_snappy_workmem(aTHX_ enc);
/* Back up old buffer and allocate new one with correct size */
- old_buf = enc->buf_start;
- Newx(enc->buf_start, dest_len, char);
- if (!enc->buf_start) {
- enc->buf_start = old_buf; /* for cleanup */
- croak("Out of memory!");
- }
- enc->pos = enc->buf_start;
- enc->buf_end = enc->buf_start + dest_len;
+ srl_buf_copy_buffer(aTHX_ &enc->buf, &old_buf);
+ srl_buf_init_buffer(aTHX_ &enc->buf, dest_len);
/* Copy Sereal header */
- Copy(old_buf, enc->pos, sereal_header_len, char);
- enc->pos += sereal_header_len;
+ Copy(old_buf.start, enc->buf.pos, sereal_header_len, char);
+ enc->buf.pos += sereal_header_len;
+ SRL_UPDATE_BODY_POS(enc); /* will do the right thing wrt. protocol V1 / V2 */
/* Embed compressed packet length */
if ( SRL_ENC_HAVE_OPTION(enc, SRL_F_COMPRESS_SNAPPY_INCREMENTAL ) ) {
- varint_start= enc->pos;
+ varint_start= enc->buf.pos;
srl_buf_cat_varint_nocheck(aTHX_ enc, 0, dest_len);
- varint_end= enc->pos - 1;
+ varint_end= enc->buf.pos - 1;
}
- csnappy_compress(old_buf+sereal_header_len, (uint32_t)uncompressed_body_length, enc->pos, &dest_len,
+ csnappy_compress(old_buf.start + sereal_header_len, (uint32_t)uncompressed_body_length, enc->buf.pos, &dest_len,
enc->snappy_workmem, CSNAPPY_WORKMEM_BYTES_POWER_OF_TWO);
assert(dest_len != 0);
- /* overwrite the max size varint with the real size of the compressed data */
- if (varint_start)
- srl_update_varint_from_to(aTHX_ varint_start, varint_end, dest_len);
-
- Safefree(old_buf);
- enc->pos += dest_len;
- assert(enc->pos <= enc->buf_end);
+ /* If compression didn't help, swap back to old, uncompressed buffer */
+ if (dest_len >= uncompressed_body_length) {
+ /* swap in old, uncompressed buffer */
+ srl_buf_swap_buffer(aTHX_ &enc->buf, &old_buf);
+ /* disable snappy flag */
+ srl_reset_snappy_header_flag(enc);
+ }
+ else { /* go ahead with Snappy and do final fixups */
+ /* overwrite the max size varint with the real size of the compressed data */
+ if (varint_start)
+ srl_update_varint_from_to(aTHX_ varint_start, varint_end, dest_len);
+ enc->buf.pos += dest_len;
+ }
- /* TODO If compression didn't help, swap back to old, uncompressed buffer */
+ srl_buf_free_buffer(aTHX_ &old_buf);
+ assert(enc->buf.pos <= enc->buf.end);
} /* end of "actually do snappy compression" */
} /* end of "want snappy compression?" */
while ( NULL != (ent = PTABLE_iter_next(it)) ) {
const ptrdiff_t offset = (ptrdiff_t)ent->value;
if ( offset ) {
- char *pos = enc->buf_start + offset;
+ char *pos = enc->buf.body_pos + offset;
assert(*pos == SRL_HDR_WEAKEN);
- if (DEBUGHACK) warn("setting %lu to PAD", (long unsigned int)offset);
+ if (DEBUGHACK) warn("setting byte at offset %lu to PAD", (long unsigned int)offset);
*pos = SRL_HDR_PAD;
}
}
BUF_SIZE_ASSERT(enc, 2 + SRL_MAX_VARINT_LENGTH + n);
if (n < 16 && refcount == 1) {
- enc->pos--; /* backup over previous REFN */
+ enc->buf.pos--; /* backup over previous REFN */
srl_buf_cat_char_nocheck(enc, SRL_HDR_ARRAYREF + n);
} else {
/* header and num. elements */
BUF_SIZE_ASSERT(enc, 2 + SRL_MAX_VARINT_LENGTH + 3*n);
if (n < 16 && refcount == 1) {
- enc->pos--; /* back up over the previous REFN */
+ enc->buf.pos--; /* back up over the previous REFN */
srl_buf_cat_char_nocheck(enc, SRL_HDR_HASHREF + n);
} else {
srl_buf_cat_varint_nocheck(aTHX_ enc, SRL_HDR_HASH, n);
* + 2*n = very conservative min size of n hashkeys if all COPY */
BUF_SIZE_ASSERT(enc, 2 + SRL_MAX_VARINT_LENGTH + 3*n);
if (n < 16 && refcount == 1) {
- enc->pos--; /* backup over the previous REFN */
+ enc->buf.pos--; /* backup over the previous REFN */
srl_buf_cat_char_nocheck(enc, SRL_HDR_HASHREF + n);
} else {
srl_buf_cat_varint_nocheck(aTHX_ enc, SRL_HDR_HASH, n);
}
else {
/* remember current offset before advancing it */
- const ptrdiff_t newoffset = enc->pos - enc->buf_start;
+ const ptrdiff_t newoffset = BODY_POS_OFS(enc->buf);
PTABLE_store(string_seenhash, (void *)str, (void *)newoffset);
}
}
srl_dump_svpv(pTHX_ srl_encoder_t *enc, SV *src)
{
STRLEN len;
- const char const *str= SvPV(src, len);
+ const char * const str= SvPV(src, len);
if ( SRL_ENC_HAVE_OPTION(enc, SRL_F_DEDUPE_STRINGS) && len > 3 ) {
HV *string_deduper_hv= SRL_GET_STR_DEDUPER_HV(enc);
HE *dupe_offset_he= hv_fetch_ent(string_deduper_hv, src, 1, 0);
return;
} else {
/* start tracking this string */
- sv_setuv(ofs_sv, (UV)BUF_POS_OFS(enc));
+ sv_setuv(ofs_sv, (UV)BODY_POS_OFS(enc->buf));
}
}
}
} else {
srl_buf_cat_varint_nocheck(aTHX_ enc, SRL_HDR_BINARY, src_len);
}
- Copy(src, enc->pos, src_len, char);
- enc->pos += src_len;
+ Copy(src, enc->buf.pos, src_len, char);
+ enc->buf.pos += src_len;
}
+
+
/* Dumps generic SVs and delegates
* to more specialized functions for RVs, etc. */
/* TODO decide when to use the IV, when to use the PV, and when
MAGIC *mg;
AV *backrefs;
SV* refsv= NULL;
+ SV* replacement= NULL;
UV weakref_ofs= 0; /* preserved between loops */
- int nobless;
SSize_t ref_rewrite_pos= 0; /* preserved between loops - note SSize_t is a perl define */
assert(src);
- nobless = SRL_ENC_HAVE_OPTION(enc, SRL_F_NO_BLESS_OBJECTS);
- if (++enc->recursion_depth == enc->max_recursion_depth) {
+ if (expect_false( ++enc->recursion_depth == enc->max_recursion_depth )) {
croak("Hit maximum recursion depth (%lu), aborting serialization",
(unsigned long)enc->max_recursion_depth);
}
if (DEBUGHACK) warn("backreferences %p", src);
}
#endif
- if ( mg || backrefs ) {
+ if (expect_false( mg || backrefs )) {
PTABLE_t *weak_seenhash= SRL_GET_WEAK_SEENHASH(enc);
PTABLE_ENTRY_t *pe= PTABLE_find(weak_seenhash, src);
if (!pe) {
/* we have seen it before, so we do not need to bless it again */
if (ref_rewrite_pos) {
if (DEBUGHACK) warn("ref to %p as %lu", src, (long unsigned int)oldoffset);
- enc->pos= enc->buf_start + ref_rewrite_pos;
+ enc->buf.pos= enc->buf.body_pos + ref_rewrite_pos;
srl_buf_cat_varint(aTHX_ enc, SRL_HDR_REFP, (UV)oldoffset);
} else {
if (DEBUGHACK) warn("alias to %p as %lu", src, (long unsigned int)oldoffset);
srl_buf_cat_varint(aTHX_ enc, SRL_HDR_ALIAS, (UV)oldoffset);
}
- SRL_SET_FBIT(*(enc->buf_start + oldoffset));
+ SRL_SET_FBIT(*(enc->buf.body_pos + oldoffset));
--enc->recursion_depth;
return;
}
- if (DEBUGHACK) warn("storing %p as %lu", src, (long unsigned int)BUF_POS_OFS(enc));
- PTABLE_store(ref_seenhash, src, (void *)BUF_POS_OFS(enc));
+ if (DEBUGHACK) warn("storing %p as %lu", src, (long unsigned int)BODY_POS_OFS(enc->buf));
+ PTABLE_store(ref_seenhash, src, (void *)BODY_POS_OFS(enc->buf));
}
}
- if (weakref_ofs != 0) {
+ if (expect_false( weakref_ofs != 0 )) {
sv_dump(src);
croak("Corrupted weakref? weakref_ofs=0 (this should not happen)");
}
+ if (replacement) {
+ if (SvROK(replacement)) {
+ src= SvRV(replacement);
+ } else {
+ src= replacement;
+ }
+ replacement= NULL;
+ svt = SvTYPE(src);
+ /* plus one ensures that later on we get REFN/ARRAY and not ARRAYREF - This is horrible tho. needs to be revisited another day */
+ refcount= SvREFCNT(src) + 1;
+ /* We could, but do not do the following:*/
+ /* goto redo_dump; */
+ /* Probably a "proper" solution would, but there are nits there that I dont want to chase right now. */
+ }
if (SvPOKp(src)) {
#if defined(MODERN_REGEXP) && !defined(REGEXP_NO_LONGER_POK)
/* Only need to enter here if we have rather modern regexps, but they're
assert(referent);
}
#endif
- if (SvWEAKREF(src)) {
+ if (expect_false( SvWEAKREF(src) )) {
if (DEBUGHACK) warn("Is weakref %p", src);
- weakref_ofs= BUF_POS_OFS(enc);
+ weakref_ofs= BODY_POS_OFS(enc->buf);
srl_buf_cat_char(enc, SRL_HDR_WEAKEN);
}
- ref_rewrite_pos= BUF_POS_OFS(enc);
- if (sv_isobject(src)) {
- /* Check that we actually want to support objects */
- if (expect_false( SRL_ENC_HAVE_OPTION(enc, SRL_F_CROAK_ON_BLESS)) ) {
- croak("Attempted to serialize blessed reference. Serializing objects "
- "using Sereal::Encoder was explicitly disabled using the "
- "'croak_on_bless' option.");
- }
- /* FIXME reuse/ref/... should INCLUDE the bless stuff. */
+
+ ref_rewrite_pos= BODY_POS_OFS(enc->buf);
+
+ if (expect_false( sv_isobject(src) )) {
/* Write bless operator with class name */
- if (!nobless)
- srl_dump_classname(aTHX_ enc, referent);
+ replacement= srl_get_frozen_object(aTHX_ enc, src, referent);
+ srl_dump_classname(aTHX_ enc, referent, replacement); /* 1 == have freeze call */
}
+
srl_buf_cat_char(enc, SRL_HDR_REFN);
refsv= src;
src= referent;
+
if (DEBUGHACK) warn("Going to redo %p", src);
goto redo_dump;
}
* want to serialize around for REFP and ALIAS output */ \
PTABLE_t *ref_seenhash= SRL_GET_REF_SEENHASH(enc); \
PTABLE_delete(ref_seenhash, src); \
- enc->pos= enc->buf_start + ref_rewrite_pos; \
+ enc->buf.pos= enc->buf.body_pos + ref_rewrite_pos; \
} \
srl_buf_cat_char((enc), SRL_HDR_UNDEF); \
} \
* want to serialize around for REFP and ALIAS output */ \
PTABLE_t *ref_seenhash= SRL_GET_REF_SEENHASH(enc); \
PTABLE_delete(ref_seenhash, src); \
- enc->pos= enc->buf_start + ref_rewrite_pos; \
+ enc->buf.pos= enc->buf.body_pos + ref_rewrite_pos; \
str = SvPV((refsv), len); \
} else \
str = SvPV((src), len); \
# define INITIALIZATION_SIZE 64
#endif
+#include "srl_buffer_types.h"
+
typedef struct PTABLE * ptable_ptr;
typedef struct {
- char *buf_start; /* ptr to "physical" start of output buffer */
- char *buf_end; /* ptr to end of output buffer */
- char *pos; /* ptr to current position within output buffer */
+ srl_buffer_t buf;
+ srl_buffer_t tmp_buf; /* temporary buffer for swapping */
- U32 operational_flags; /* flags that pertain to one encode run (rather than being options): See SRL_OF_* defines */
- U32 flags; /* flag-like options: See SRL_F_* defines */
- UV max_recursion_depth; /* Configurable limit on the number of recursive calls we're willing to make */
+ U32 operational_flags; /* flags that pertain to one encode run (rather than being options): See SRL_OF_* defines */
+ U32 flags; /* flag-like options: See SRL_F_* defines */
+ UV max_recursion_depth; /* Configurable limit on the number of recursive calls we're willing to make */
- UV recursion_depth; /* current Perl-ref recursion depth */
+ UV recursion_depth; /* current Perl-ref recursion depth */
ptable_ptr ref_seenhash; /* ptr table for avoiding circular refs */
ptable_ptr weak_seenhash; /* ptr table for avoiding dangling weakrefs */
ptable_ptr str_seenhash; /* ptr table for issuing COPY commands based on PTRS (used for classnames and keys) */
+ ptable_ptr freezeobj_svhash; /* ptr table for tracking objects and their frozen replacments via FREEZE */
HV *string_deduper_hv; /* track strings we have seen before, by content */
- void *snappy_workmem; /* lazily allocated if and only if using Snappy */
- IV snappy_threshold; /* do not compress things smaller than this even if Snappy enabled */
+ void *snappy_workmem; /* lazily allocated if and only if using Snappy */
+ IV snappy_threshold; /* do not compress things smaller than this even if Snappy enabled */
+
+ /*HV *freeze_cb_cache;*/ /* cache of callbacks for FREEZE methods: classname => CV*.
+ * only used if SRL_F_ENABLE_FREEZE_SUPPORT is set. */
+ SV *sereal_string_sv; /* SV that says "Sereal" for FREEZE support */
} srl_encoder_t;
/* constructor from options */
void srl_destroy_encoder(pTHX_ srl_encoder_t *enc);
/* Write Sereal packet header to output buffer */
-void srl_write_header(pTHX_ srl_encoder_t *enc);
+void srl_write_header(pTHX_ srl_encoder_t *enc, SV *user_header_src);
/* Start dumping a top-level SV */
-void srl_dump_data_structure(pTHX_ srl_encoder_t *enc, SV *src);
+void srl_dump_data_structure(pTHX_ srl_encoder_t *enc, SV *src, SV *user_header_src);
/* define option bits in srl_encoder_t's flags member */
* Corresponds to the 'no_bless_objects' flag found in the Decoder. */
#define SRL_F_NO_BLESS_OBJECTS 0x01000UL
+/* If set in flags, then we serialize using Sereal protocol version 1. */
+#define SRL_F_USE_PROTO_V1 0x02000UL
+
+/* If set in flags, then support calling FREEZE method on objects. */
+#define SRL_F_ENABLE_FREEZE_SUPPORT 0x04000UL
+
/* Set while the encoder is in active use / dirty */
#define SRL_OF_ENCODER_DIRTY 1UL
COPY | "/" | 47 | 0x2f | 0b00101111 | <OFFSET-VARINT> - copy of item defined at offset
WEAKEN | "0" | 48 | 0x30 | 0b00110000 | <REF-TAG> - Weaken the following reference
REGEXP | "1" | 49 | 0x31 | 0b00110001 | <PATTERN-STR-TAG> <MODIFIERS-STR-TAG>
- RESERVED_0 | "2" | 50 | 0x32 | 0b00110010 | reserved
- RESERVED_1 | "3" | 51 | 0x33 | 0b00110011 |
- RESERVED_2 | "4" | 52 | 0x34 | 0b00110100 |
- RESERVED_3 | "5" | 53 | 0x35 | 0b00110101 |
- RESERVED_4 | "6" | 54 | 0x36 | 0b00110110 |
- RESERVED_5 | "7" | 55 | 0x37 | 0b00110111 |
- RESERVED_6 | "8" | 56 | 0x38 | 0b00111000 |
- RESERVED_7 | "9" | 57 | 0x39 | 0b00111001 | reserved
+ OBJECT_FREEZE | "2" | 50 | 0x32 | 0b00110010 | <STR-TAG> <ITEM-TAG> - class, object-item. Need to call "THAW" method on class after decoding
+ OBJECTV_FREEZE | "3" | 51 | 0x33 | 0b00110011 | <OFFSET-VARINT> <ITEM-TAG> - (OBJECTV_FREEZE is to OBJECT_FREEZE as OBJECTV is to OBJECT)
+ RESERVED_0 | "4" | 52 | 0x34 | 0b00110100 | reserved
+ RESERVED_1 | "5" | 53 | 0x35 | 0b00110101 |
+ RESERVED_2 | "6" | 54 | 0x36 | 0b00110110 |
+ RESERVED_3 | "7" | 55 | 0x37 | 0b00110111 |
+ RESERVED_4 | "8" | 56 | 0x38 | 0b00111000 |
+ RESERVED_5 | "9" | 57 | 0x39 | 0b00111001 | reserved
FALSE | ":" | 58 | 0x3a | 0b00111010 | false (PL_sv_no)
TRUE | ";" | 59 | 0x3b | 0b00111011 | true (PL_sv_yes)
- MANY | "<" | 60 | 0x3c | 0b00111100 | <LEN-VARINT> <TYPE-BYTE> <TAG-DATA> - repeated tag (not done yet, will be implemented in version 2)
+ MANY | "<" | 60 | 0x3c | 0b00111100 | <LEN-VARINT> <TYPE-BYTE> <TAG-DATA> - repeated tag (not done yet, will be implemented in version 3)
PACKET_START | "=" | 61 | 0x3d | 0b00111101 | (first byte of magic string in header)
EXTEND | ">" | 62 | 0x3e | 0b00111110 | <BYTE> - for additional tags
PAD | "?" | 63 | 0x3f | 0b00111111 | (ignored tag, skip to next byte)
#define SRL_MAGIC_STRLEN 4 /* Length of SRL_MAGIC_STRING */
#define SRL_MAGIC_STRING_LILIPUTIAN 0x6c72733d /* SRL_MAGIC_STRING as a little endian integer */
-#define SRL_PROTOCOL_VERSION ( 1 ) /* this is the first. for some reason we did not use 0 */
+#define SRL_PROTOCOL_VERSION ( 2 )
#define SRL_PROTOCOL_VERSION_BITS ( 4 ) /* how many bits we use for the version, the rest go to the encoding */
#define SRL_PROTOCOL_VERSION_MASK ( ( 1 << SRL_PROTOCOL_VERSION_BITS ) - 1 )
#define SRL_PROTOCOL_ENCODING_SNAPPY ( 1 << SRL_PROTOCOL_VERSION_BITS )
#define SRL_PROTOCOL_ENCODING_SNAPPY_INCREMENTAL ( 2 << SRL_PROTOCOL_VERSION_BITS )
-
+/* Bits in the header bitfield */
+#define SRL_PROTOCOL_HDR_USER_DATA ( 1 )
+#define SRL_PROTOCOL_HDR_CONTINUE ( 8 ) /* TODO Describe in spec - not urgent since not meaningful yet */
/* Useful constants */
/* See also range constants below for the header byte */
#define SRL_HDR_WEAKEN ((char)48) /* <REF-TAG> - Weaken the following reference */
#define SRL_HDR_REGEXP ((char)49) /* <PATTERN-STR-TAG> <MODIFIERS-STR-TAG>*/
+#define SRL_HDR_OBJECT_FREEZE ((char)50) /* <STR-TAG> <ITEM-TAG> - class, object-item. Need to call "THAW" method on class after decoding */
+#define SRL_HDR_OBJECTV_FREEZE ((char)51) /* <OFFSET-VARINT> <ITEM-TAG> - (OBJECTV_FREEZE is to OBJECT_FREEZE as OBJECTV is to OBJECT) */
+
/* Note: Can do reserved check with a range now, but as we start using
* them, might have to explicit == check later. */
-#define SRL_HDR_RESERVED ((char)50) /* reserved */
-#define SRL_HDR_RESERVED_LOW ((char)50)
+#define SRL_HDR_RESERVED ((char)52) /* reserved */
+#define SRL_HDR_RESERVED_LOW ((char)52)
#define SRL_HDR_RESERVED_HIGH ((char)57)
#define SRL_HDR_FALSE ((char)58) /* false (PL_sv_no) */
#define SRL_HDR_TRUE ((char)59) /* true (PL_sv_yes) */
-#define SRL_HDR_MANY ((char)60) /* <LEN-VARINT> <TYPE-BYTE> <TAG-DATA> - repeated tag (not done yet, will be implemented in version 2) */
+#define SRL_HDR_MANY ((char)60) /* <LEN-VARINT> <TYPE-BYTE> <TAG-DATA> - repeated tag (not done yet, will be implemented in version 3) */
#define SRL_HDR_PACKET_START ((char)61) /* (first byte of magic string in header) */
sub run_tests {
my ($extra_name, $opt_hash) = @_;
+ setup_tests(2);
foreach my $bt (@BasicTests) {
my (undef, $exp, $name) = @$bt;
$name="unnamed" if not defined $name;
#next unless $name=~/PAD/;
- $exp = "$Header$exp";
+ $exp = Header(). $exp;
my $enc = Sereal::Encoder->new($opt_hash ? $opt_hash : ());
my $out;
eval{
plan skip_all => 'Did not find right version of decoder';
}
else {
- run_roundtrip_tests();
+ run_roundtrip_tests(2); # 2 is "run for proto version 2 only"
}
--- /dev/null
+#!perl
+use strict;
+use warnings;
+use Sereal::Encoder;
+use Data::Dumper;
+use File::Spec;
+
+# These tests use an installed Decoder (or respectively Encoder) to do
+# round-trip testing. There are two strategies, both with drawbacks:
+# - Test::More's is_deeply is waaaay too lenient to catch all the
+# subtleties that Sereal is supposed to encode.
+# - Serialize - Deserialize - Serialize, then do a string compare.
+# This won't catch if the first serialization has bogus output
+# but the subsequent de- & serialization work for the already
+# bogus output.
+# These tests can't replace carefully crafted manual tests, I fear.
+
+use lib File::Spec->catdir(qw(t lib));
+BEGIN {
+ lib->import('lib')
+ if !-d 't';
+}
+
+use Sereal::TestSet qw(:all);
+use Test::More;
+
+my $ok = have_encoder_and_decoder();
+if (not $ok) {
+ plan skip_all => 'Did not find right version of decoder';
+}
+else {
+ run_roundtrip_tests(1); # 1 is "run for proto-version 1 only"
+}
+
+
+pass();
+done_testing();
+
--- /dev/null
+#!perl
+use strict;
+use warnings;
+use Sereal::Encoder qw(:all);
+use File::Spec;
+use Scalar::Util qw( blessed );
+use lib File::Spec->catdir(qw(t lib));
+BEGIN {
+ lib->import('lib')
+ if !-d 't';
+}
+
+use Sereal::TestSet qw(:all);
+use Test::More;
+
+my $ref = Header(2, chr(0b0000_1100)) . chr(0b0001_0000); # -16 in body, 12 in header
+is(encode_sereal_with_header_data(-16, 12), $ref, "Encode 12 in header, -16 in body");
+is(Sereal::Encoder->new->encode(-16, 12), $ref, "OO: Encode 12 in header, -16 in body");
+
+my $ok = have_encoder_and_decoder();
+if (not $ok) {
+ SKIP: { skip 'Did not find right version of decoder' => 1 }
+}
+else {
+ my $dec = Sereal::Decoder->new;
+ my $encoded = encode_sereal_with_header_data(-16, 12);
+ my $decoded = $dec->decode($encoded);
+ is($decoded, -16, "-16 decoded correctly");
+ $decoded = $dec->decode_only_header($encoded);
+ is($decoded, 12, "12 decoded correctly");
+
+ my $munged = "X" . $encoded;
+ $decoded = $dec->decode_with_offset($munged, 1);
+ is($decoded, -16, "-16 decoded correctly (offset)");
+ $decoded = $dec->decode_only_header_with_offset($munged, 1);
+ is($decoded, 12, "12 decoded correctly (offset)");
+}
+
+pass("Alive at end");
+done_testing();
+
--- /dev/null
+#!perl
+use strict;
+use warnings;
+# most be loaded before Sereal::TestSet
+use Sereal::Encoder qw(encode_sereal);
+use Sereal::Encoder::Constants qw(:all);
+use File::Spec;
+use Test::More;
+use Data::Dumper;
+
+use lib File::Spec->catdir(qw(t lib));
+BEGIN {
+ lib->import('lib')
+ if !-d 't';
+}
+
+use Sereal::TestSet qw(:all);
+
+my $thaw_called = 0;
+my $freeze_called = 0;
+
+package Foo;
+sub new {
+ my $class = shift;
+ return bless({bar => 1, @_} => $class);
+}
+
+sub FREEZE {
+ my ($self, $serializer) = @_;
+ $freeze_called = $serializer eq 'Sereal' ? 1 : 0;
+ return "frozen object", 12, [2];
+}
+
+sub THAW {
+ my ($class, $serializer, @data) = @_;
+ $thaw_called = $serializer eq 'Sereal' ? 1 : 0;
+ Test::More::is_deeply(\@data, ["frozen object", 12, [2]], "Array of frozen values roundtrips");
+
+ return Foo->new();
+}
+
+package Bar;
+sub new {
+ my $class = shift;
+ return bless({bar => 1, @_} => $class);
+}
+
+sub FREEZE {
+ my ($self, $serializer) = @_;
+ return "frozen Bar";
+}
+
+package main;
+
+my $enc = Sereal::Encoder->new({freeze_callbacks => 1});
+my $srl = $enc->encode(Foo->new());
+ok($freeze_called, "FREEZE was invoked");
+
+
+my $run_decoder_tests = have_encoder_and_decoder();
+if (not $run_decoder_tests) {
+ done_testing();
+ exit;
+}
+
+
+# Simple round-trip test
+my $dec = Sereal::Decoder->new;
+my $obj = $dec->decode($srl);
+ok(defined($obj));
+isa_ok($obj, "Foo");
+is(eval{$obj->{bar}}, 1) or diag Dumper($obj);
+
+# Test referential integrity
+my $foo = Foo->new;
+my $data = [$foo, $foo];
+$srl = $enc->encode($data);
+ok($srl =~ /frozen object/);
+
+my $out = $dec->decode($srl);
+is_deeply($out, $data, "Roundtrip works");
+
+cmp_ok($out->[0], "eq", $out->[1],
+ "Referential integrity: multiple RVs do not turn into clones")
+ or diag(Dumper($data,$out));
+
+my $barobj = Bar->new;
+$srl = $enc->encode($barobj);
+ok(not(eval {$dec->decode($srl); 1}), "Decoding without THAW barfs");
+
+
+# Multiple-object-same-class test from Christian Hansen
+
+{
+ package MyObject;
+
+ sub from_num {
+ my ($class, $num) = @_;
+ return bless { num => $num }, $class;
+ }
+
+ sub num {
+ my ($self) = @_;
+ return $self->{num};
+ }
+
+ sub FREEZE {
+ return $_[0]->num;
+ }
+
+ sub THAW {
+ my ($class, undef, $num) = @_;
+ return $class->from_num($num);
+ }
+}
+
+my @objects = map { MyObject->from_num($_) } (10, 20, 30);
+my $encoded = encode_sereal([ @objects ], { freeze_callbacks => 1 });
+my $decoded = Sereal::Decoder::decode_sereal($encoded);
+
+isa_ok($decoded, 'ARRAY');
+is(scalar @$decoded, 3, 'array has three elements');
+isa_ok($decoded->[0], 'MyObject', 'first element');
+isa_ok($decoded->[1], 'MyObject', 'second element');
+isa_ok($decoded->[2], 'MyObject', 'third element');
+
+is($decoded->[0]->num, 10, 'first MyObject->num');
+is($decoded->[1]->num, 20, 'second MyObject->num');
+is($decoded->[2]->num, 30, 'third MyObject->num');
+
+
+done_testing();
croak_on_bless => 1,
});
- is($e->encode(1), $Header.integer(1), "Encoder works before exception");
+ is($e->encode(1), Header().integer(1), "Encoder works before exception");
$ok = eval{$out = $e->encode(bless({}, "Foo")); 1};
$err = $@ || 'Zombie error';
ok(!$ok, "Object throws exception");
ok($err =~ /object/i, 'Exception refers to object');
- is($e->encode(1), $Header.integer(1), "Encoder works after exception");
+ is($e->encode(1), Header().integer(1), "Encoder works after exception");
$ok = eval {$out = $e->encode({}); 1};
ok($ok, "Non-blessed hash does not throw exception");
$ok = eval {$out = $e->encode(sub{}); 1};
$err = $@ || 'Zombie error';
ok($ok, "undef_unknown makes CODE encoding not fail");
- is($out, $Header . chr(SRL_HDR_UNDEF), "output is undef")
+ is($out, Header() . chr(SRL_HDR_UNDEF), "output is undef")
or do {
hobodecode($out) if $ENV{DEBUG_SEREAL};
}
use Exporter;
our @ISA = qw(Exporter);
our @EXPORT_OK = qw(
- $Header @BasicTests $Class $ConstClass
+ @BasicTests $Class $ConstClass
+ Header
FBIT
hobodecode
integer short_string varint array array_fbit
run_roundtrip_tests
write_test_files
$use_objectv
+ setup_tests
);
our %EXPORT_TAGS = (all => \@EXPORT_OK);
return $out;
}
-our $Header = SRL_MAGIC_STRING . chr(SRL_PROTOCOL_VERSION) . chr(0);
-
-my $ary_ref_for_repeating = [5,6];
-my $scalar_ref_for_repeating = \9;
-
-my $weak_thing; $weak_thing = [\$weak_thing, 1]; weaken($weak_thing->[0]);
-
-my $unicode1= "Ba\xDF Ba\xDF"; my $unicode2= "\x{168}nix! \x{263a}"; utf8::upgrade($unicode1); utf8::upgrade($unicode2);
-
-
-our @BasicTests = (
- # warning: this hardcodes the POS/NEG headers
- [-16, chr(0b0001_0000), "encode -16"],
- [-1, chr(0b0001_1111), "encode -1"],
- [0, chr(0b0000_0000), "encode 0"],
- [1, chr(0b0000_0001), "encode 1"],
- [15, chr(0b0000_1111), "encode 15"],
- [undef, chr(SRL_HDR_UNDEF), "encode undef"],
- ["", short_string(""), "encode empty string"],
- ["1", short_string("1"), "encode string '1'"],
- ["91a", short_string("91a"), "encode string '91a'"],
- ["abc" x 1000, chr(SRL_HDR_BINARY).varint(3000).("abc" x 1000), "long ASCII string"],
- [\1, chr(SRL_HDR_REFN).chr(0b0000_0001), "scalar ref to int"],
- [[], array(), "empty array ref"],
- [[1,2,3], array(chr(0b0000_0001), chr(0b0000_0010), chr(0b0000_0011)), "array ref"],
- [1000, chr(SRL_HDR_VARINT).varint(1000), "large int"],
- [ [1..1000],
- array(
- (map chr, (1 .. SRL_POS_MAX_SIZE)),
- (map chr(SRL_HDR_VARINT) . varint($_), ((SRL_POS_MAX_SIZE+1) .. 1000))
- ),
- "array ref with pos and varints"
- ],
-
- [{}, hash(), "empty hash ref"],
- [{foo => "baaaaar"}, hash(short_string("foo"),short_string("baaaaar")), "simple hash ref"],
- [
- [qw(foooo foooo foooo)],
- sub {
- my $opt = shift;
- if ($opt->{dedupe_strings} || $opt->{aliased_dedupe_strings}) {
- my $d = array_head(3);
- my $pos = length($Header) + length($d);
- my $tag = $opt->{aliased_dedupe_strings} ? SRL_HDR_ALIAS : SRL_HDR_COPY;
- $d .= short_string("foooo") . chr($tag) . varint($pos)
- . chr($tag) . varint($pos);
- return $d;
- }
- else {
- return array(short_string("foooo"),short_string("foooo"), short_string("foooo"));
- }
- },
- "ary ref with repeated string"
- ],
- [
- [{foooo => "barrr"}, {barrr => "foooo"}],
- array(hash(short_string("foooo"), short_string("barrr")),
- hash(short_string("barrr"), short_string("foooo"))),
- "ary ref of hash refs without repeated strings"
- ],
- [
- [{foooo => "foooo"}, {foooo2 => "foooo"}],
- sub {
- my $opt = shift;
- if ($opt->{dedupe_strings} || $opt->{aliased_dedupe_strings}) {
- my $tag = $opt->{aliased_dedupe_strings} ? SRL_HDR_ALIAS : SRL_HDR_COPY;
- my $d = array_head(2) . hash_head(2) . short_string("foooo");
- my $pos = length($Header) + length($d);
- $d .= short_string("foooo") . hash_head(2)
- . short_string("foooo2")
- . chr($tag) . varint($pos);
- return $d;
- }
- else {
- return array(hash(short_string("foooo"), short_string("foooo")),
- hash(short_string("foooo2"), short_string("foooo"))),
- }
- },
- "ary ref of hash refs with repeated strings"
- ],
- [$scalar_ref_for_repeating, chr(SRL_HDR_REFN).chr(0b0000_1001), "scalar ref to constant"],
- [[$scalar_ref_for_repeating, $scalar_ref_for_repeating],
- do {
- my $content = array_head(2);
- $content .= chr(SRL_HDR_REFN);
- my $pos = length($Header) + length($content);
- $content .= chr(0b1000_1001)
- .chr(SRL_HDR_REFP)
- .varint($pos)
- ;
- $content
- }, "repeated substructure (REFP): scalar ref"],
- [[$ary_ref_for_repeating, $ary_ref_for_repeating],
- do {
- my $content = array_head(2);
- my $pos = length($Header) + length($content) + 1;
- $content .= array_fbit(chr(0b0000_0101), chr(0b0000_0110))
- .chr(SRL_HDR_REFP)
- .varint($pos)
- ;
- $content
- }, "repeated substructure (REFP): array"],
- [[\$ary_ref_for_repeating, [1, $ary_ref_for_repeating]],
- do {
- my $content = array_head(2) . chr(SRL_HDR_REFN);
- my $pos = length($Header) + length($content) + 1;
- $content .= array_fbit(
- chr(0b0000_0101),
- chr(0b0000_0110)
- )
- . array(
- chr(0b0000_0001),
- chr(SRL_HDR_REFP) . varint($pos)
- )
- ;
- $content
- }, "repeated substructure (REFP): asymmetric"],
- [
- $weak_thing,
- chr(SRL_HDR_REFN) . chr(SRL_HDR_ARRAY + FBIT) . varint(2)
- .chr(SRL_HDR_PAD)
- .chr(SRL_HDR_REFN)
- .chr(SRL_HDR_REFP)
- .varint(length($Header)+1)
- .chr(0b0000_0001)
- ,
- "weak thing copy (requires PAD)"
- ],
- [
- \$weak_thing,
- chr(SRL_HDR_REFN)
- . chr(SRL_HDR_REFN + FBIT)
- . chr(SRL_HDR_ARRAY) . varint(2)
- .chr(SRL_HDR_WEAKEN) . chr(SRL_HDR_REFP) . varint(length($Header)+1)
- .chr(0b0000_0001)
- ,
- "weak thing ref"
- ],
- sub { \@_ } ->(
- $weak_thing,
- chr(SRL_HDR_REFN + FBIT)
- .chr(SRL_HDR_ARRAY).varint(2)
- .chr(SRL_HDR_WEAKEN).chr(SRL_HDR_REFP).varint(length($Header))
- .chr(0b0000_0001)
- ,
- "weak thing (aliased root)"
- ),
- [
- do { my @array; $array[0]=\$array[1]; $array[1]=\$array[0]; \@array },
- do {
- my $content= array_head(2);
- my $pos= length($content);
- $content
- . chr(SRL_HDR_REFN + FBIT)
- . chr(SRL_HDR_REFP + FBIT)
- . varint(length($Header) + $pos )
- . chr(SRL_HDR_ALIAS)
- . varint(length($Header) + $pos + 1)
- },
- "scalar cross"
- ],
- [
- do { my @array; $array[0]=\$array[1]; $array[1]=\$array[0]; weaken($array[1]); weaken($array[0]); \@array },
- do {
- my $content= array_head(2);
- my $pos= length($content);
- $content
- . chr(SRL_HDR_WEAKEN + FBIT)
- . chr(SRL_HDR_REFN)
- . chr(SRL_HDR_WEAKEN + FBIT)
- . chr(SRL_HDR_REFP)
- . varint(length($Header)+$pos)
- . chr(SRL_HDR_ALIAS)
- . varint(length($Header)+$pos+2)
- },
- "weak scalar cross"
- ],
- [
- bless([],"foo"),
- dump_bless(array(), "foo"),
- "bless [], 'foo' (2)"
- ],
- [
- do { my $qr= bless qr/foo/ix,"bar"; [ $qr, $qr ] },
- do {
- my $content= array_head(2);
- my $pos= length($content);
- join("", $content,
- chr(SRL_HDR_OBJECT),
- short_string("bar"),
- chr(SRL_HDR_REFN),
- chr(SRL_HDR_REGEXP + FBIT),
- short_string("foo"),
- short_string("ix"),
- chr(SRL_HDR_REFP),
- varint(length($Header) + $pos + 6 ),
- )
- },
- "blessed regexp with reuse"
- ],
- [
- do { my $o1=bless [], "foo"; my $o2=bless [], "foo"; [ $o1, $o2, $o1, $o2 ] },
- do {
- my $content= array_head(4). chr(SRL_HDR_OBJECT);
- my $pos= length($content);
- join("",$content,
- short_string("foo"),
- chr(SRL_HDR_REFN).chr(SRL_HDR_ARRAY + FBIT),varint(0),
- chr( SRL_HDR_OBJECT + $use_objectv),
- $use_objectv ? () : chr(SRL_HDR_COPY), varint(length($Header) + $pos),
- chr(SRL_HDR_REFN).chr(SRL_HDR_ARRAY + FBIT), varint(0),
- chr(SRL_HDR_REFP),varint(length($Header) + $pos + 5),
- chr(SRL_HDR_REFP),varint(length($Header) + $pos + 10),
- )
- },
- "blessed arrays with reuse"
- ],
- [
- [bless([], "foo"), bless([], "foo")],
- do {
- my $content = array_head(2) . chr(SRL_HDR_OBJECT);
- my $pos = length($Header) + length($content);
- $content .= short_string("foo")
- . array()
- . dump_bless( array(), \$pos )
- ;
- $content
- },
- "reused classname empty array"
- ],
- [
- bless([bless {}, "foo"], "foo"),
- do {
- my $content = chr(SRL_HDR_OBJECT);
- my $pos = length($Header) + length($content);
- $content .= short_string("foo")
- . array_head(1)
- . dump_bless(hash(), \$pos);
- ;
- $content
- },
- "wrapped objects"
- ],
- [
- qr/foo/,
- dump_bless(
- chr(SRL_HDR_REFN)
- .chr(SRL_HDR_REGEXP)
- .short_string("foo")
- .short_string(""),
- "Regexp"
- ),
- "qr/foo/"
- ],
- [
- qr/(?i-xsm:foo)/,
- dump_bless(
- chr(SRL_HDR_REFN)
- .chr(SRL_HDR_REGEXP)
- .short_string("(?i-xsm:foo)")
- .short_string(""),
- "Regexp"
- ),
- "qr/(?i-xsm:foo)/"
- ],
- [
- qr/foo/i,
- dump_bless(
+our $PROTO_VERSION;
+
+sub Header {
+ my $proto_version = shift || $PROTO_VERSION;
+ my $user_data_blob = shift;
+ my $hdr_base = SRL_MAGIC_STRING . chr($proto_version||SRL_PROTOCOL_VERSION);
+ if (defined $user_data_blob) {
+ return $hdr_base . varint(1 + length($user_data_blob)) . chr(1) . $user_data_blob;
+ }
+ else {
+ return $hdr_base . chr(0);
+ }
+}
+
+sub offset {
+ my ($str)= @_;
+ Carp::confess("no protoversion") if !defined $PROTO_VERSION;
+ if ($PROTO_VERSION >= 2) {
+ return length($str)+1;
+ } else {
+ return length($str) + length Header($PROTO_VERSION);
+ }
+}
+
+sub offseti {
+ my ( $i )= @_;
+ if ($PROTO_VERSION >= 2) {
+ return $i + 1;
+ } else {
+ return $i + length Header($PROTO_VERSION);
+ }
+}
+
+sub debug_checks {
+ my ($data_ref, $encoded_ref, $decoded_ref) = @_;
+ if (defined $ENV{DEBUG_SEREAL}) {
+ note("Original data was: " . Data::Dumper::Dumper($$data_ref)) if defined $data_ref;
+ note("Encoded data is: " . (defined($$encoded_ref) ? $$encoded_ref : "<undef>")) if defined $encoded_ref;
+ note("Decoded data was: " . Data::Dumper::Dumper($$decoded_ref)) if defined $decoded_ref;
+ }
+ if (defined $ENV{DEBUG_DUMP}) {
+ Dump($$encoded_ref) if defined $encoded_ref;
+ Dump($$decoded_ref) if defined $decoded_ref;
+ }
+ if (defined $ENV{DEBUG_HOBO}) {
+ hobodecode($$encoded_ref) if defined $encoded_ref;
+ }
+ exit() if $ENV{DEBUG_FAIL_FATAL};
+}
+
+our @BasicTests;
+sub setup_tests {
+ my ($proto_version)=@_;
+ $PROTO_VERSION= $proto_version if defined $proto_version;
+ my $ary_ref_for_repeating = [5,6];
+ my $scalar_ref_for_repeating = \9;
+
+ my $weak_thing; $weak_thing = [\$weak_thing, 1]; weaken($weak_thing->[0]);
+
+ my $unicode1= "Ba\xDF Ba\xDF"; my $unicode2= "\x{168}nix! \x{263a}"; utf8::upgrade($unicode1); utf8::upgrade($unicode2);
+
+
+ @BasicTests = (
+ # warning: this hardcodes the POS/NEG headers
+ [-16, chr(0b0001_0000), "encode -16"],
+ [-1, chr(0b0001_1111), "encode -1"],
+ [0, chr(0b0000_0000), "encode 0"],
+ [1, chr(0b0000_0001), "encode 1"],
+ [15, chr(0b0000_1111), "encode 15"],
+ [undef, chr(SRL_HDR_UNDEF), "encode undef"],
+ ["", short_string(""), "encode empty string"],
+ ["1", short_string("1"), "encode string '1'"],
+ ["91a", short_string("91a"), "encode string '91a'"],
+ ["abc" x 1000, chr(SRL_HDR_BINARY).varint(3000).("abc" x 1000), "long ASCII string"],
+ [\1, chr(SRL_HDR_REFN).chr(0b0000_0001), "scalar ref to int"],
+ [[], array(), "empty array ref"],
+ [[1,2,3], array(chr(0b0000_0001), chr(0b0000_0010), chr(0b0000_0011)), "array ref"],
+ [1000, chr(SRL_HDR_VARINT).varint(1000), "large int"],
+ [ [1..1000],
+ array(
+ (map chr, (1 .. SRL_POS_MAX_SIZE)),
+ (map chr(SRL_HDR_VARINT) . varint($_), ((SRL_POS_MAX_SIZE+1) .. 1000))
+ ),
+ "array ref with pos and varints"
+ ],
+
+ [{}, hash(), "empty hash ref"],
+ [{foo => "baaaaar"}, hash(short_string("foo"),short_string("baaaaar")), "simple hash ref"],
+ [
+ [qw(foooo foooo foooo)],
+ sub {
+ my $opt = shift;
+ if ($opt->{dedupe_strings} || $opt->{aliased_dedupe_strings}) {
+ my $d = array_head(3);
+ my $pos = offset($d);
+ my $tag = $opt->{aliased_dedupe_strings} ? SRL_HDR_ALIAS : SRL_HDR_COPY;
+ $d .= short_string("foooo") . chr($tag) . varint($pos)
+ . chr($tag) . varint($pos);
+ return $d;
+ }
+ else {
+ return array(short_string("foooo"),short_string("foooo"), short_string("foooo"));
+ }
+ },
+ "ary ref with repeated string"
+ ],
+ [
+ [{foooo => "barrr"}, {barrr => "foooo"}],
+ array(hash(short_string("foooo"), short_string("barrr")),
+ hash(short_string("barrr"), short_string("foooo"))),
+ "ary ref of hash refs without repeated strings"
+ ],
+ [
+ [{foooo => "foooo"}, {foooo2 => "foooo"}],
+ sub {
+ my $opt = shift;
+ if ($opt->{dedupe_strings} || $opt->{aliased_dedupe_strings}) {
+ my $tag = $opt->{aliased_dedupe_strings} ? SRL_HDR_ALIAS : SRL_HDR_COPY;
+ my $d = array_head(2) . hash_head(2) . short_string("foooo");
+ my $pos = offset($d);
+ $d .= short_string("foooo") . hash_head(2)
+ . short_string("foooo2")
+ . chr($tag) . varint($pos);
+ return $d;
+ }
+ else {
+ return array(hash(short_string("foooo"), short_string("foooo")),
+ hash(short_string("foooo2"), short_string("foooo"))),
+ }
+ },
+ "ary ref of hash refs with repeated strings"
+ ],
+ [$scalar_ref_for_repeating, chr(SRL_HDR_REFN).chr(0b0000_1001), "scalar ref to constant"],
+ [[$scalar_ref_for_repeating, $scalar_ref_for_repeating],
+ do {
+ my $content = array_head(2);
+ $content .= chr(SRL_HDR_REFN);
+ my $pos = offset($content);
+ $content .= chr(0b1000_1001)
+ .chr(SRL_HDR_REFP)
+ .varint($pos)
+ ;
+ $content
+ }, "repeated substructure (REFP): scalar ref"],
+ [[$ary_ref_for_repeating, $ary_ref_for_repeating],
+ do {
+ my $content = array_head(2);
+ my $pos = offset($content) + 1;
+ $content .= array_fbit(chr(0b0000_0101), chr(0b0000_0110))
+ .chr(SRL_HDR_REFP)
+ .varint($pos)
+ ;
+ $content
+ }, "repeated substructure (REFP): array"],
+ [[\$ary_ref_for_repeating, [1, $ary_ref_for_repeating]],
+ do {
+ my $content = array_head(2) . chr(SRL_HDR_REFN);
+ my $pos = offset($content) + 1;
+ $content .= array_fbit(
+ chr(0b0000_0101),
+ chr(0b0000_0110)
+ )
+ . array(
+ chr(0b0000_0001),
+ chr(SRL_HDR_REFP) . varint($pos)
+ )
+ ;
+ $content
+ }, "repeated substructure (REFP): asymmetric"],
+ [
+ $weak_thing,
+ chr(SRL_HDR_REFN)
+ . chr(SRL_HDR_ARRAY + FBIT) . varint(2)
+ . chr(SRL_HDR_PAD) . chr(SRL_HDR_REFN)
+ . chr(SRL_HDR_REFP) . varint(offseti(1))
+ . chr(0b0000_0001)
+ ,
+ "weak thing copy (requires PAD)"
+ ],
+ [
+ \$weak_thing,
chr(SRL_HDR_REFN)
- .chr(SRL_HDR_REGEXP)
- .short_string("foo")
- .short_string("i"),
- "Regexp"
+ . chr(SRL_HDR_REFN + FBIT)
+ . chr(SRL_HDR_ARRAY) . varint(2)
+ .chr(SRL_HDR_WEAKEN) . chr(SRL_HDR_REFP) . varint(offseti(1))
+ .chr(0b0000_0001)
+ ,
+ "weak thing ref"
+ ],
+ sub { \@_ } ->(
+ $weak_thing,
+ chr(SRL_HDR_REFN + FBIT)
+ .chr(SRL_HDR_ARRAY).varint(2)
+ .chr(SRL_HDR_WEAKEN).chr(SRL_HDR_REFP).varint(offseti(0))
+ .chr(0b0000_0001)
+ ,
+ "weak thing (aliased root)"
),
- "qr/foo/i"
- ],
- [
- [{foo => 1}, {foo => 2}],
- sub {
- my $opt = shift;
- if ($opt->{no_shared_hashkeys}) {
- return array(
- hash(
- short_string("foo"),
- integer(1),
- ),
- hash(
- short_string("foo"),
- integer(2),
- ),
- );
- }
- else {
+ [
+ do { my @array; $array[0]=\$array[1]; $array[1]=\$array[0]; \@array },
+ do {
my $content= array_head(2);
- return join(
- "",
- $content,
- hash(
- short_string("foo"),
- integer(1),
- ),
- hash(
- chr(SRL_HDR_COPY) . varint(length($Header)+length($content)+1),
- integer(2),
- ),
+ my $pos= offset($content);
+ $content
+ . chr(SRL_HDR_REFN + FBIT)
+ . chr(SRL_HDR_REFP + FBIT)
+ . varint( $pos )
+ . chr(SRL_HDR_ALIAS)
+ . varint($pos + 1)
+ },
+ "scalar cross"
+ ],
+ [
+ do { my @array; $array[0]=\$array[1]; $array[1]=\$array[0]; weaken($array[1]); weaken($array[0]); \@array },
+ do {
+ my $content= array_head(2);
+ my $pos= offset($content);
+ $content
+ . chr(SRL_HDR_WEAKEN + FBIT)
+ . chr(SRL_HDR_REFN)
+ . chr(SRL_HDR_WEAKEN + FBIT)
+ . chr(SRL_HDR_REFP)
+ . varint($pos)
+ . chr(SRL_HDR_ALIAS)
+ . varint($pos+2)
+ },
+ "weak scalar cross"
+ ],
+ [
+ bless([],"foo"),
+ dump_bless(array(), "foo"),
+ "bless [], 'foo' (2)"
+ ],
+ [
+ do { my $qr= bless qr/foo/ix,"bar"; [ $qr, $qr ] },
+ do {
+ my $content= array_head(2);
+ my $pos= offset($content);
+ join("", $content,
+ chr(SRL_HDR_OBJECT),
+ short_string("bar"),
+ chr(SRL_HDR_REFN),
+ chr(SRL_HDR_REGEXP + FBIT),
+ short_string("foo"),
+ short_string("ix"),
+ chr(SRL_HDR_REFP),
+ varint($pos + 6 ),
)
- }
- },
- "duplicate hash keys"
- ],
- [
- { $unicode1 => $unicode2 },
- hash(
- chr(SRL_HDR_STR_UTF8) . varint(bytes::length($unicode1)) . encode_utf8($unicode1),
- chr(SRL_HDR_STR_UTF8) . varint(bytes::length($unicode2)) . encode_utf8($unicode2),
- ),
- "simple unicode hash key and value"
- ],
- [
- sub { \@_ }->(!1,!0),
- array(chr(SRL_HDR_FALSE),chr(SRL_HDR_TRUE)),
- "true/false"
- ]
-);
-
+ },
+ "blessed regexp with reuse"
+ ],
+ [
+ do { my $o1=bless [], "foo"; my $o2=bless [], "foo"; [ $o1, $o2, $o1, $o2 ] },
+ do {
+ my $content= array_head(4). chr(SRL_HDR_OBJECT);
+ my $pos= offset($content);
+ join("",$content,
+ short_string("foo"),
+ chr(SRL_HDR_REFN).chr(SRL_HDR_ARRAY + FBIT),varint(0),
+ chr( SRL_HDR_OBJECT + $use_objectv),
+ $use_objectv ? () : chr(SRL_HDR_COPY), varint($pos),
+ chr(SRL_HDR_REFN).chr(SRL_HDR_ARRAY + FBIT), varint(0),
+ chr(SRL_HDR_REFP),varint($pos + 5),
+ chr(SRL_HDR_REFP),varint($pos + 10),
+ )
+ },
+ "blessed arrays with reuse"
+ ],
+ [
+ [bless([], "foo"), bless([], "foo")],
+ do {
+ my $content = array_head(2) . chr(SRL_HDR_OBJECT);
+ my $pos = offset($content);
+ $content .= short_string("foo")
+ . array()
+ . dump_bless( array(), \$pos )
+ ;
+ $content
+ },
+ "reused classname empty array"
+ ],
+ [
+ bless([bless {}, "foo"], "foo"),
+ do {
+ my $content = chr(SRL_HDR_OBJECT);
+ my $pos = offset($content);
+ $content .= short_string("foo")
+ . array_head(1)
+ . dump_bless(hash(), \$pos);
+ ;
+ $content
+ },
+ "wrapped objects"
+ ],
+ [
+ qr/foo/,
+ dump_bless(
+ chr(SRL_HDR_REFN)
+ .chr(SRL_HDR_REGEXP)
+ .short_string("foo")
+ .short_string(""),
+ "Regexp"
+ ),
+ "qr/foo/"
+ ],
+ [
+ qr/(?i-xsm:foo)/,
+ dump_bless(
+ chr(SRL_HDR_REFN)
+ .chr(SRL_HDR_REGEXP)
+ .short_string("(?i-xsm:foo)")
+ .short_string(""),
+ "Regexp"
+ ),
+ "qr/(?i-xsm:foo)/"
+ ],
+ [
+ qr/foo/i,
+ dump_bless(
+ chr(SRL_HDR_REFN)
+ .chr(SRL_HDR_REGEXP)
+ .short_string("foo")
+ .short_string("i"),
+ "Regexp"
+ ),
+ "qr/foo/i"
+ ],
+ [
+ [{foo => 1}, {foo => 2}],
+ sub {
+ my $opt = shift;
+ if ($opt->{no_shared_hashkeys}) {
+ return array(
+ hash(
+ short_string("foo"),
+ integer(1),
+ ),
+ hash(
+ short_string("foo"),
+ integer(2),
+ ),
+ );
+ }
+ else {
+ my $content= array_head(2);
+ return join(
+ "",
+ $content,
+ hash(
+ short_string("foo"),
+ integer(1),
+ ),
+ hash(
+ chr(SRL_HDR_COPY) . varint(offset($content)+1),
+ integer(2),
+ ),
+ )
+ }
+ },
+ "duplicate hash keys"
+ ],
+ [
+ { $unicode1 => $unicode2 },
+ hash(
+ chr(SRL_HDR_STR_UTF8) . varint(bytes::length($unicode1)) . encode_utf8($unicode1),
+ chr(SRL_HDR_STR_UTF8) . varint(bytes::length($unicode2)) . encode_utf8($unicode2),
+ ),
+ "simple unicode hash key and value"
+ ],
+ [
+ sub { \@_ }->(!1,!0),
+ array(chr(SRL_HDR_FALSE),chr(SRL_HDR_TRUE)),
+ "true/false"
+ ]
+ );
+}
sub get_git_top_dir {
return();
};
my $cmp_v = $need_class->VERSION;
+ $cmp_v =~ s/_//;
+ $cmp_v = sprintf("%.2f", int($cmp_v*100)/100);
if (not defined $cmp_v or not exists $compat_versions{$cmp_v}) {
note("Could not load correct version of $need_class for testing "
."(got: $cmp_v, needed any of ".join(", ", keys %compat_versions).")");
sub run_roundtrip_tests {
- for my $opt (
- ['plain', { } ],
- ['snappy', { snappy => 1 } ],
- ['snappy_incr', { snappy_incr => 1 } ],
- ['sort_keys', { sort_keys => 1 } ],
- ['dedupe_strings', { dedupe_strings => 1 } ],
- ) {
- run_roundtrip_tests_internal(@$opt);
+ my ($proto_version) = @_;
+ my @proto_versions = ($proto_version ? ($proto_version) : qw(2 1));
+
+ for my $proto_version ($proto_version) {
+ my $suffix = $proto_version == 1 ? "_v1" : "";
+
+ for my $opt (
+ ['plain', { } ],
+ ['snappy', { snappy => 1 } ],
+ ['snappy_incr', { snappy_incr => 1 } ],
+ ['sort_keys', { sort_keys => 1 } ],
+ ['dedupe_strings', { dedupe_strings => 1 } ],
+ ['freeze/thaw', { freeze_callbacks => 1 } ],
+ ) {
+ my ($name, $opts) = @$opt;
+ $name .= $suffix;
+ $opts->{use_protocol_v1} = 1 if $proto_version == 1;
+ $PROTO_VERSION= $proto_version;
+ setup_tests();
+ run_roundtrip_tests_internal($name, $opts);
+ }
}
}
sub run_roundtrip_tests_internal {
- my ($ename, $opt) = @_;
+ my ($ename, $opt, $encode_decode_callbacks) = @_;
my $decoder = Sereal::Decoder->new($opt);
my $encoder = Sereal::Encoder->new($opt);
['object-oriented',
sub {$encoder->encode(shift)},
sub {$decoder->decode(shift)}],
+ ['header-body',
+ sub {$encoder->encode(shift, 123456789)}, # header data is abitrary to stand out for debugging
+ sub {$decoder->decode(shift)}],
+ ['header-only',
+ sub {$encoder->encode(987654321, shift)}, # body data is abitrary to stand out for debugging
+ sub {$decoder->decode_only_header(shift)}],
)
{
my ($mname, $enc, $dec) = @$meth;
+ next if $mname =~ /header/ and $opt->{use_protocol_v1};
foreach my $rt (@RoundtripTests) {
my ($name, $data) = @$rt;
- my $encoded = $enc->($data);
+ my $encoded;
+ eval {$encoded = $enc->($data); 1}
+ or do {
+ my $err = $@ || 'Zombie error';
+ diag("Got error while encoding: $err");
+ };
ok(defined $encoded, "$name ($ename, $mname, encoded defined)")
or do {
- if (defined $ENV{DEBUG_SEREAL}) {
- note("Data was: " . Data::Dumper::Dumper($data));
- note("Output was: " . (defined($encoded) ? $encoded : "<undef>"));
- }
+ debug_checks(\$data, \$encoded, undef);
next;
};
- my $decoded= $dec->($encoded);
+ my $decoded;
+ eval {$decoded = $dec->($encoded); 1}
+ or do {
+ my $err = $@ || 'Zombie error';
+ diag("Got error while decoding: $err");
+ };
ok( defined($decoded) == defined($data), "$name ($ename, $mname, decoded definedness)")
- or next;
- my $encoded2 = $enc->($decoded);
+ or do {
+ debug_checks(\$data, \$encoded, undef);
+ next;
+ };
+
+ # Second roundtrip
+ my $encoded2;
+ eval {$encoded2 = $enc->($decoded); 1}
+ or do {
+ my $err = $@ || 'Zombie error';
+ diag("Got error while encoding the second time: $err");
+ };
ok(defined $encoded2, "$name ($ename, $mname, encoded2 defined)")
- or next;
- my $decoded2 = $dec->($encoded2);
+ or do {
+ debug_checks(\$data, \$encoded, \$decoded);
+ next;
+ };
+
+ my $decoded2;
+ eval {$decoded2 = $dec->($encoded2); 1}
+ or do {
+ my $err = $@ || 'Zombie error';
+ diag("Got error while encoding the second time: $err");
+ };
+
ok(defined($decoded2) == defined($data), "$name ($ename, $mname, decoded2 defined)")
- or next;
+ or next;
is_deeply($decoded, $data, "$name ($ename, $mname, decoded vs data)")
- or do {
- if ($ENV{DEBUG_DUMP}) {
- Dump($decoded);
- Dump($data);
- }
- };
+ or do {
+ debug_checks(\$data, \$encoded2, \$decoded2);
+ };
is_deeply($decoded2, $data, "$name ($ename, $mname, decoded2 vs data)")
- or do {
- if ($ENV{DEBUG_DUMP}) {
- Dump($decoded2);
- Dump($data);
- }
- };
+ or do {
+ debug_checks(\$data, \$encoded2, \$decoded2);
+ };
is_deeply($decoded, $decoded2, "$name ($ename, $mname, decoded vs decoded2)")
- or do {
- if ($ENV{DEBUG_DUMP}) {
- Dump($decoded);
- Dump($decoded2);
- }
- };
-
+ or do {
+ debug_checks(\$data, \$encoded2, \$decoded2);
+ };
+
if (0) {
# It isnt really safe to test this way right now. The exact output
# of two runs of Sereal is not guaranteed to be the same due to the effect of
$ret = is_string($encoded2, $encoded, "$name ($ename, $mname, encoded2 vs encoded)");
}
$ret or do {
- if ($ENV{DEBUG_DUMP}) {
- Dump($decoded);
- Dump($data);
- } elsif ($ENV{DEBUG_HOBO}) {
- open my $pipe,"| perl -Mblib=../Encoder/blib -Mblib=../Decoder/blib author_tools/hobodecoder.pl -e"
- or die "Dead: $!";
- print $pipe $encoded;
- close $pipe;
- open $pipe,"| perl -Mblib=../Encoder/blib -Mblib=../Decoder/blib author_tools/hobodecoder.pl -e"
- or die "Dead: $!";
- print $pipe $encoded2;
- close $pipe;
- }
+ debug_checks(\$data, \$encoded, \$decoded);
};
}
}
my $make_data_file_name = sub {File::Spec->catfile($dir, sprintf("test_data_%05u", shift))};
my $make_name_file_name = sub {File::Spec->catfile($dir, sprintf("test_name_%05u", shift))};
+ setup_tests();
foreach my $testno (1..@BasicTests) {
my $t = $BasicTests[$testno-1];
my $data = ref($t->[1]) eq 'CODE' ? $t->[1]->() : $t->[1];
- _write_file($make_data_file_name->($testno), $Header.$data);
+ _write_file($make_data_file_name->($testno), Header($PROTO_VERSION).$data);
_write_file($make_name_file_name->($testno), $t->[2] . "\n");
}