From: Alexandre Mestiashvili Date: Fri, 10 Jan 2014 09:57:42 +0000 (+0100) Subject: Imported Upstream version 2.03 X-Git-Tag: archive/raspbian/4.017+ds-1+rpi1~1^2~3^2~32 X-Git-Url: https://dgit.raspbian.org/?a=commitdiff_plain;h=450feee35687ccf25121f650a23989bbfda649cf;p=libsereal-encoder-perl.git Imported Upstream version 2.03 --- diff --git a/Changes b/Changes index 2c2a378..23f012b 100644 --- a/Changes +++ b/Changes @@ -1,5 +1,56 @@ Revision history for Perl extension Sereal-Encoder +* Warning: For a seamless upgrade, upgrade to version 2 +* of the decoder before upgrading to version 2 of the +* encoder! + +2.03 Tue Jan 7 20:00 2014 (AMS time) + - (Hopefully) final fixes to FREEZE/THAW functionality: + => Add safe assertion to make sure that we don't segfault on invalid + data. + => Fix encoding/decoding of data structures with repeated references + to the same instance of a class that has FREEZE/THAW hooks. + Thanks to Christian Hansen for a test case. + - Distribution dependency fix. + +2.02 Mon Jan 6 15:00 2014 (AMS time) + - Fundamental fixes for FREEZE/THAW support in previous Sereal v2 + releases. If you plan to use FREEZE/THAW, make sure you have 2.02 + or better (dito for the decoder). + +2.01 Tue Dec 31 08:15 2013 (AMS time) + - Promoting changes from 0.37 to 2.00_03 to a stable release. + (This being the first protocol v2 stable release.) + - Minor performance tweaks. + +2.00_03 Sun Dec 29 10:33 2013 (AMS time) + - FREEZE/THAW hooks for object serialization. + - Test improvements (allowing for partial parallel run) + - Minor optimizations. + +2.00_02 Mon Oct 28 19:32 2013 (AMS time) + - Sereal::Encoder now requires Sereal::Decoder for better testing. + - Fix Test::Warn dependency problem of 2.00_01. + +2.00_01 tue Oct 1 07:34 2013 (AMS time) + - NEW PROTOCOL VERSION: V2 + - User-data in header functionality: You may embed arbitrary + Sereal-serializable data in a document header. The document + header isn't compressed, so this is ideal for retrieving + small chunks of meta-data (eg. routing information) without + having to deserialize the entire document. + - Relocatable Sereal document bodies + - Encoder never emits non-incremental Snappy encoding for V2 + - Offsets now 1-based in relocatable format, not 0 + - Fixed VERY obscure (and rare) memory leak. + - Improved error messages + - Remove warning about Sereal not being production-grade + (because it IS). + - Detect when the Snappy compression was net negative in size + and back out + - C89/Windows fixes (bulk88) + - 5.18 compat: Skip test failing due to hash-randomization (Zefram) + 0.37 Mon Sep 2 07:40 2013 (AMS time) - Windows and C89 fixes - Band-aid: Skip test failing due to hash-randomization [Zefram] diff --git a/Encoder.xs b/Encoder.xs index f2369d8..a366199 100644 --- a/Encoder.xs +++ b/Encoder.xs @@ -36,17 +36,20 @@ DESTROY(enc) srl_destroy_encoder(aTHX_ enc); void -encode(enc, src) +encode(enc, src, ...) srl_encoder_t *enc; SV *src; + SV *hdr_user_data_src = NULL; PPCODE: assert(enc != NULL); - srl_dump_data_structure(aTHX_ enc, src); - assert(enc->pos > enc->buf_start); + if (items >= 2 && SvOK(ST(2))) + hdr_user_data_src = ST(2); + srl_dump_data_structure(aTHX_ enc, src, hdr_user_data_src); + assert(enc->buf.pos > enc->buf.start); /* We always copy the string since we might reuse the string buffer. That means * we already have to do a malloc and we might as well use the opportunity to * allocate only as much memory as we really need to hold the output. */ - ST(0) = sv_2mortal(newSVpvn(enc->buf_start, (STRLEN)BUF_POS_OFS(enc))); + ST(0) = sv_2mortal(newSVpvn(enc->buf.start, (STRLEN)BUF_POS_OFS(enc->buf))); XSRETURN(1); void @@ -58,27 +61,59 @@ encode_sereal(src, opt = NULL) PPCODE: enc = srl_build_encoder_struct(aTHX_ opt); assert(enc != NULL); - srl_dump_data_structure(aTHX_ enc, src); + srl_dump_data_structure(aTHX_ enc, src, NULL); /* Avoid copy by stealing string buffer if it is not too large. * This makes sense in the functional interface since the string * buffer isn't ever going to be reused. */ - assert(enc->buf_start < enc->pos); - if (BUF_POS_OFS(enc) > 20 && BUF_SPACE(enc) < BUF_POS_OFS(enc) ) { + assert(enc->buf.start < enc->buf.pos); + if (BUF_POS_OFS(enc->buf) > 20 && BUF_SPACE(enc->buf) < BUF_POS_OFS(enc->buf) ) { /* If not wasting more than 2x memory - FIXME fungible */ SV *sv = sv_2mortal(newSV_type(SVt_PV)); ST(0) = sv; - SvPV_set(sv, enc->buf_start); - SvLEN_set(sv, BUF_SIZE(enc)); - SvCUR_set(sv, BUF_POS_OFS(enc)); + SvPV_set(sv, enc->buf.start); + SvLEN_set(sv, BUF_SIZE(enc->buf)); + SvCUR_set(sv, BUF_POS_OFS(enc->buf)); SvPOK_on(sv); - enc->buf_start = enc->pos = NULL; /* no need to free these guys now */ + enc->buf.start = enc->buf.pos = NULL; /* no need to free these guys now */ } else { - ST(0) = sv_2mortal(newSVpvn(enc->buf_start, (STRLEN)BUF_POS_OFS(enc))); + ST(0) = sv_2mortal(newSVpvn(enc->buf.start, (STRLEN)BUF_POS_OFS(enc->buf))); } XSRETURN(1); +void +encode_sereal_with_header_data(src, hdr_user_data_src, opt = NULL) + SV *src; + SV *hdr_user_data_src; + HV *opt; + PREINIT: + srl_encoder_t *enc; + PPCODE: + if (!SvOK(hdr_user_data_src)) + hdr_user_data_src = NULL; + enc = srl_build_encoder_struct(aTHX_ opt); + assert(enc != NULL); + srl_dump_data_structure(aTHX_ enc, src, hdr_user_data_src); + /* Avoid copy by stealing string buffer if it is not too large. + * This makes sense in the functional interface since the string + * buffer isn't ever going to be reused. */ + assert(enc->buf.start < enc->buf.pos); + if (BUF_POS_OFS(enc->buf) > 20 && BUF_SPACE(enc->buf) < BUF_POS_OFS(enc->buf) ) { + /* If not wasting more than 2x memory - FIXME fungible */ + SV *sv = sv_2mortal(newSV_type(SVt_PV)); + ST(0) = sv; + SvPV_set(sv, enc->buf.start); + SvLEN_set(sv, BUF_SIZE(enc->buf)); + SvCUR_set(sv, BUF_POS_OFS(enc->buf)); + SvPOK_on(sv); + + enc->buf.start = enc->buf.pos = NULL; /* no need to free these guys now */ + } + else { + ST(0) = sv_2mortal(newSVpvn(enc->buf.start, (STRLEN)BUF_POS_OFS(enc->buf))); + } + XSRETURN(1); MODULE = Sereal::Encoder PACKAGE = Sereal::Encoder::Constants PROTOTYPES: DISABLE diff --git a/MANIFEST b/MANIFEST index 190a106..7f6d2d3 100644 --- a/MANIFEST +++ b/MANIFEST @@ -1,4 +1,5 @@ author_tools/bench.pl +author_tools/freeze_thaw_timing.pl author_tools/hobodecoder.pl author_tools/numeric_str_length.c author_tools/stringify_test.c @@ -23,6 +24,7 @@ snappy/csnappy_decompress.c snappy/csnappy_internal.h snappy/csnappy_internal_userspace.h srl_buffer.h +srl_buffer_types.h srl_common.h srl_encoder.c srl_encoder.h @@ -35,7 +37,10 @@ t/010_desperate.t t/020_sort_keys.t t/021_sort_keys_option.t t/100_roundtrip.t +t/101_roundtrip_v1.t t/110_nobless.t +t/120_hdr_data.t +t/130_freezethaw.t t/160_recursion.t t/200_bulk.t t/300_fail.t diff --git a/META.json b/META.json index d671f20..b5e53f9 100644 --- a/META.json +++ b/META.json @@ -28,6 +28,7 @@ "File::Path" : "0", "File::Spec" : "0", "Scalar::Util" : "0", + "Sereal::Decoder" : "2.03", "Test::LongString" : "0", "Test::More" : "0.88", "Test::Warn" : "0" @@ -54,5 +55,5 @@ "url" : "git://github.com/Sereal/Sereal.git" } }, - "version" : "0.37" + "version" : "2.03" } diff --git a/META.yml b/META.yml index 99a6669..917c859 100644 --- a/META.yml +++ b/META.yml @@ -9,6 +9,7 @@ build_requires: File::Path: 0 File::Spec: 0 Scalar::Util: 0 + Sereal::Decoder: 2.03 Test::LongString: 0 Test::More: 0.88 Test::Warn: 0 @@ -31,4 +32,4 @@ requires: resources: bugtracker: https://github.com/Sereal/Sereal/issues repository: git://github.com/Sereal/Sereal.git -version: 0.37 +version: 2.03 diff --git a/Makefile.PL b/Makefile.PL index 2c5fd33..0c8aa9e 100644 --- a/Makefile.PL +++ b/Makefile.PL @@ -67,6 +67,7 @@ WriteMakefile1( 'Test::LongString' => '0', 'Data::Dumper' => '0', 'Test::Warn' => '0', + 'Sereal::Decoder' => '2.03', }, NAME => $module, VERSION_FROM => 'lib/Sereal/Encoder.pm', # finds $VERSION diff --git a/author_tools/bench.pl b/author_tools/bench.pl index 4904b83..3039b7f 100644 --- a/author_tools/bench.pl +++ b/author_tools/bench.pl @@ -10,6 +10,7 @@ use Data::Undump qw(undump); use Data::Dumper qw(Dumper); use Data::Dumper::Limited qw(DumpLimited); use Data::MessagePack; +use CBOR::XS qw(encode_cbor decode_cbor); use Getopt::Long qw(GetOptions); my ( @@ -64,13 +65,13 @@ push @str, substr($chars, int(rand(int(length($chars)/2+1))), 10) for 1..1000; my @rand = map rand, 1..1000; our %data; -$data{$_}= make_data() for qw(sereal sereal_func dd1 dd2 ddl mp json_xs storable sereal_snappy); +$data{$_}= make_data() for qw(sereal sereal_func dd1 dd2 ddl mp json_xs storable sereal_snappy cbor); our $enc = Sereal::Encoder->new(\%opt); our $enc_snappy = Sereal::Encoder->new({%opt, snappy => 1}); our $dec = Sereal::Decoder->new(\%opt); -our ($json_xs, $dd1, $dd2, $ddl, $sereal, $storable, $mp, $sereal_snappy); +our ($json_xs, $dd1, $dd2, $ddl, $sereal, $storable, $mp, $sereal_snappy, $cbor); # do this first before any of the other dumpers "contaminate" the iv/pv issue $sereal = $enc->encode($data{sereal}); $sereal_snappy = $enc_snappy->encode($data{sereal_snappy}); @@ -80,7 +81,8 @@ if (!SEREAL_ONLY) { $dd2 = Dumper($data{dd2}); $ddl = DumpLimited($data{ddl}) if !$medium_data or $nobless; $mp = $mpo->pack($data{mp}) if !$medium_data or $nobless; - $storable = nfreeze($data{storable}); # must be last + $cbor = encode_cbor($data{cbor}) if !$medium_data or $nobless; + $storable = nfreeze($data{storable}); } print($sereal), exit if $dump; @@ -93,6 +95,7 @@ if (!SEREAL_ONLY) { ["JSON::XS", bytes::length($json_xs)], ["Data::Dumper::Limited", bytes::length($ddl)], ["Data::MessagePack", bytes::length($mp)], + ["CBOR", bytes::length($cbor)], )), ["Data::Dumper (1)", bytes::length($dd1)], ["Data::Dumper (2)", bytes::length($dd2)], @@ -118,6 +121,7 @@ if ($encoder) { json_xs => '$::x = encode_json($::data{json_xs});', ddl => '$::x = DumpLimited($::data{ddl});', msgpack => '$::x = $::mpo->pack($::data{mp});', + cbor => '$::x = encode_cbor($::data{cbor});', )), dd_noindent => '$::x = Data::Dumper->new([$::data{dd1}])->Indent(0)->Dump();', dd => '$::x = Dumper($::data{dd2});', @@ -140,6 +144,7 @@ if ($decoder) { json_xs => '$::x = decode_json($::json_xs);', undump_ddl => '$::x = Data::Undump::undump($::ddl);', msgpack => '$::x = $::mpo->unpack($::mp);', + cbor => '$::x = decode_cbor($::cbor);', )), eval_dd => '$::x = eval $::dd1;', storable => '$::x = thaw($::storable);', diff --git a/author_tools/freeze_thaw_timing.pl b/author_tools/freeze_thaw_timing.pl new file mode 100644 index 0000000..7064634 --- /dev/null +++ b/author_tools/freeze_thaw_timing.pl @@ -0,0 +1,86 @@ +#!/usr/bin/env perl +use strict; +use warnings; +use Sereal::Encoder; +use Sereal::Decoder; + +use Benchmark::Dumb qw(cmpthese); + +my $enc_nocb = Sereal::Encoder->new(); +my $enc_cb = Sereal::Encoder->new({freeze_callbacks => 1}); +my $dec = Sereal::Decoder->new(); + +package Foo; +sub new { + my $class = shift; + return bless({@_} => $class); +} + +sub FREEZE { + my ($self, $serializer) = @_; + return $self->{name}; # performance +} + +sub THAW { + my ($class, $serializer, $data) = @_; + return Foo->new(name => $data); +} + +package main; + +my $data = Foo->new(name => "blargh"); +my $data_big = []; +for (1..100) { + push @$data_big, Foo->new(name => "blargh"); +} +my $data_big_nocb = []; +for (1..100) { + push @$data_big_nocb, bless({name => "blargh"} => "Bar"); +} + +my $frozen_nocb = $enc_nocb->encode($data); +my $frozen_cb = $enc_cb->encode($data); + +my $frozen_big_nocb = $enc_nocb->encode($data_big); +my $frozen_big_cb = $enc_cb->encode($data_big); + +my $timing = "1000.01"; + +print "Comparing small serialization with/out callbacks...\n"; +cmpthese( + $timing, + { + cb => sub {$enc_cb->encode($data)}, + no_cb => sub {$enc_nocb->encode($data)}, + } +); + +print "Comparing big serialization with/out callbacks...\n"; +cmpthese( + $timing, + { + cb => sub {$enc_cb->encode($data_big)}, + no_cb => sub {$enc_nocb->encode($data_big)}, + cb_nocbdata => sub {$enc_cb->encode($data_big_nocb)}, + no_cb_nocbdata => sub {$enc_nocb->encode($data_big_nocb)}, + } +); + + +print "Comparing small deserialization with/out callbacks...\n"; +cmpthese( + $timing, + { + cb => sub {$dec->decode($frozen_cb)}, + no_cb => sub {$dec->decode($frozen_nocb)}, + } +); + +print "Comparing big deserialization with/out callbacks...\n"; +cmpthese( + $timing, + { + cb => sub {$dec->decode($frozen_big_cb)}, + no_cb => sub {$dec->decode($frozen_big_nocb)}, + } +); diff --git a/author_tools/hobodecoder.pl b/author_tools/hobodecoder.pl index 87fb11d..0c0f935 100644 --- a/author_tools/hobodecoder.pl +++ b/author_tools/hobodecoder.pl @@ -15,34 +15,11 @@ BEGIN { ' } or die "No encoder/decoder constants: $err\n$@"; } -GetOptions( - my $opt = {}, - 'e|stderr', -); - -$| = 1; -if ($opt->{e}) { - select(STDERR); -} - -my %const_names = map {$_ => eval "$_"} @Sereal::Constants::EXPORT_OK; -#print Dumper \%const_names; exit; - -local $/ = undef; -my $data = ; - -open my $fh, "| od -tu1c" or die $!; -print $fh $data; -close $fh; - -print "\n\nTotal length: " . length($data) . "\n\n"; - -my $indent = ""; my $done; -parse_header(); -while (length $data) { - my $done = parse_sv(""); -} +my $data; +my $hlen; +my $indent = ""; +my %const_names = map {$_ => eval "$_"} @Sereal::Constants::EXPORT_OK; sub parse_header { $data =~ s/^(=srl)(.)// or die "invalid header: $data"; @@ -55,6 +32,14 @@ sub parse_header { print "Sereal protocol version: $proto_version\n"; if (length($hdr)) { print "Header($len): " . join(" ", map ord, split //, $hdr) . "\n"; + if ($proto_version >= 2 && (ord(substr($hdr, 0, 1)) & 1) ) { # if first bit set => user header data + print "Found user data in header:\n"; + my $tmp_data = $data; # dance necessary because $data is treated as a global :( hobo, hobo, hobo! + $data = substr($hdr, 1); + parse_sv(" "); + $data = $tmp_data; + print "End of user data in header. Body:\n"; + } } else { print "Empty Header.\n"; @@ -67,11 +52,15 @@ sub parse_header { my $out = Compress::Snappy::decompress($data); $data = $out; } elsif ($encoding == SRL_PROTOCOL_ENCODING_SNAPPY_INCREMENTAL) { - die "Incremental Snappy encoding not implemented in hobodecoder. (yet. It's easy to do.)"; - } - elsif ($encoding) { + print "Header says: Document body is Snappy-compressed (incremental).\n"; + my $compressed_len = varint(); + require Compress::Snappy; + my $out = Compress::Snappy::decompress($data); + $data = $out; + } elsif ($encoding) { die "Invalid encoding '" . ($encoding >> SRL_PROTOCOL_VERSION_BITS) . "'"; } + $hlen= length($done); } my ($len_f, $len_d, $len_D); @@ -95,6 +84,9 @@ sub parse_long_double { return unpack("D",$v); } +my $fmt1= "%06u/%06u: %02x%1s %03s %s"; +my $fmt2= "%-6s %-6s %-2s%1s %-3s %s"; +my $lead_items= 5; # 1 less than the fmt2 sub parse_sv { my ($ind) = @_; @@ -105,106 +97,122 @@ sub parse_sv { my $bv= $o; my $high = $o > 128; $o -= 128 if $high; + printf $fmt1, $p, $p-$hlen+1, $o, $high ? '*' : ' ', $bv, $ind; + if ($o == SRL_HDR_VARINT) { - printf "%06u: %02x %03s %sVARINT: %u\n", $p, $o, $bv, $ind, varint(); + printf "VARINT: %u\n", varint(); } elsif (SRL_HDR_POS_LOW <= $o && $o <= SRL_HDR_POS_HIGH) { - printf "%06u: %02x %03s %sPOS: %u\n", $p, $o, $bv, $ind, $o; + printf "POS: %u\n", $o; } elsif (SRL_HDR_NEG_LOW <= $o && $o <= SRL_HDR_NEG_HIGH) { $o = $o - 32; - printf "%06u: %02x %03s %sNEG: %i\n", $p, $o, $bv, $ind, $o; + printf "NEG: %i\n", $o; } elsif ($o >= SRL_HDR_SHORT_BINARY_LOW) { $o -= SRL_HDR_SHORT_BINARY_LOW; my $len = $o; my $str = substr($data, 0, $len, ''); $done .= $str; - printf "%06u: %02x %03s %sSHORT_BINARY(%u): '%s'\n", $p, $o, $bv, $ind, $len, $str; + printf "SHORT_BINARY(%u): '%s'\n", $len, $str; } elsif ($o == SRL_HDR_BINARY || $o == SRL_HDR_STR_UTF8) { my $l = varint(); my $str = substr($data, 0, $l, ""); # fixme UTF8 $done .= $str; - printf "%06u: %02x %03s %s".($o == SRL_HDR_STR_UTF8 ? "STR_UTF8" : "BINARY")."(%u): '%s'\n", $p, $o, $bv, $ind, $l, $str; + printf( ($o == SRL_HDR_STR_UTF8 ? "STR_UTF8" : "BINARY")."(%u): '%s'\n", $l, $str); } elsif ($o == SRL_HDR_FLOAT) { - printf "%06u: %02x %03s %sFLOAT(%f)\n", $p, $o, $bv, $ind, parse_float(); + printf "FLOAT(%f)\n", parse_float(); } elsif ($o == SRL_HDR_DOUBLE) { - printf "%06u: %02x %03s %sDOUBLE(%f)\n", $p, $o, $bv, $ind, parse_double(); + printf "DOUBLE(%f)\n", parse_double(); } elsif ($o == SRL_HDR_LONG_DOUBLE) { - printf "%06u: %02x %03s %sLONG_DOUBLE(%f)\n", $p, $o, $bv, $ind, parse_long_double(); + printf "LONG_DOUBLE(%f)\n", parse_long_double(); } elsif ($o == SRL_HDR_REFN) { - printf "%06u: %02x %03s %sREFN\n", $p, $o, $bv, $ind; + printf "REFN\n"; parse_sv($ind . " "); } elsif ($o == SRL_HDR_REFP) { my $len = varint(); - printf "%06u: %02x %03s %sREFP(%u)\n", $p, $o, $bv, $ind, $len; + printf "REFP(%u)\n", $len; } elsif ($o == SRL_HDR_COPY) { my $len = varint(); - printf "%06u: %02x %03s %sCOPY(%u)\n", $p, $o, $bv, $ind, $len; + printf "COPY(%u)\n", $len; } elsif (SRL_HDR_ARRAYREF_LOW <= $o && $o <= SRL_HDR_ARRAYREF_HIGH) { - printf "%06u: %02x %03s %sARRAYREF", $p, $o, $bv, $ind; + printf "ARRAYREF"; parse_av($ind,$o); } elsif ($o == SRL_HDR_ARRAY) { - printf "%06u: %02x %03s %sARRAY", $p, $o, $bv, $ind; + printf "ARRAY"; parse_av($ind); } elsif (SRL_HDR_HASHREF_LOW <= $o && $o <= SRL_HDR_HASHREF_HIGH) { - printf "%06u: %02x %03s %sHASHREF", $p, $o, $bv, $ind; + printf "HASHREF"; parse_hv($ind,$o); } elsif ($o == SRL_HDR_HASH) { - printf "%06u: %02x %03s %sHASH", $p, $o, $bv, $ind; + printf "HASH"; parse_hv($ind); } elsif ($o == SRL_HDR_UNDEF) { - printf "%06u: %02x %03s %sUNDEF\n", $p, $o, $bv, $ind; + printf "UNDEF\n"; } elsif ($o == SRL_HDR_WEAKEN) { - printf "%06u: %02x %03s %sWEAKEN\n", $p, $o, $bv, $ind; + printf "WEAKEN\n"; parse_sv($ind); } elsif ($o == SRL_HDR_PAD) { - printf "%06u: %02x %03s %sPAD\n", $p, $o, $bv, $ind; + printf "PAD\n"; } elsif ($o == SRL_HDR_ALIAS) { my $ofs= varint(); - printf "%06u: %02x %03s %sALIAS(%u)\n", $p, $o, $bv, $ind, $ofs; + printf "ALIAS(%u)\n", $ofs; } elsif ($o == SRL_HDR_OBJECTV) { my $ofs= varint(); - printf "%06u: %02x %03s %sOBJECTV(%d)\n", $p, $o, $bv, $ind, $ofs; - printf "%6s %2s %3s %s Value:\n",("") x 3, $ind." "; + printf "OBJECTV(%d)\n", $ofs; + printf "$fmt2 Value:\n",("") x $lead_items, $ind; + parse_sv($ind." "); + } + elsif ($o == SRL_HDR_OBJECTV_FREEZE) { + my $ofs= varint(); + printf "OBJECTV_FREEZE(%d)\n", $ofs; + printf "$fmt2 Value:\n",("") x $lead_items, $ind; parse_sv($ind." "); } elsif ($o == SRL_HDR_OBJECT) { - printf "%06u: %02x %03s %sOBJECT\n", $p, $o, $bv, $ind; - printf "%6s %2s %3s %s Class:\n",("") x 3, $ind." "; + printf "OBJECT\n"; + printf "$fmt2 Class:\n",("") x $lead_items, $ind; parse_sv($ind." "); - printf "%6s %2s %3s %s Value:\n",("") x 3, $ind." "; + printf "$fmt2 Value:\n",("") x $lead_items, $ind; + parse_sv($ind." "); + } + elsif ($o == SRL_HDR_OBJECT_FREEZE) { + printf "OBJECT_FREEZE\n"; + printf "$fmt2 Class:\n",("") x $lead_items, $ind; + parse_sv($ind." "); + printf "$fmt2 Value:\n",("") x $lead_items, $ind; parse_sv($ind." "); } elsif ($o == SRL_HDR_REGEXP) { - printf "%06u: %02x %03s %sREGEXP\n", $p, $o, $bv, $ind; + printf "REGEXP\n"; parse_sv($ind." "); parse_sv($ind." "); } elsif ($o == SRL_HDR_FALSE) { - printf "%06u: %02x %03s %sFALSE\n", $p, $o, $bv, $ind; + printf "FALSE\n"; } elsif ($o == SRL_HDR_TRUE) { - printf "%06u: %02x %03s %sTRUE\n", $p, $o, $bv, $ind; + printf "TRUE\n"; } else { + printf "\n"; die "unsupported type: $o ($t): $const_names{$o}"; } return 0; @@ -257,3 +265,29 @@ sub varint { } return $x; } + +GetOptions( + my $opt = {}, + 'e|stderr', +); + +$| = 1; +if ($opt->{e}) { + select(STDERR); +} + +#print Dumper \%const_names; exit; + +local $/ = undef; +$data = ; + +open my $fh, "| od -tu1c" or die $!; +print $fh $data; +close $fh; + +print "\n\nTotal length: " . length($data) . "\n\n"; + +parse_header(); +while (length $data) { + $done = parse_sv(""); +} diff --git a/const-c.inc b/const-c.inc index 705fc3f..df00f43 100644 --- a/const-c.inc +++ b/const-c.inc @@ -537,15 +537,70 @@ constant_20 (pTHX_ const char *name, IV *iv_return) { return PERL_constant_NOTFOUND; } +static int +constant_21 (pTHX_ const char *name, IV *iv_return) { + /* When generated this function returned values for the list of names given + here. However, subsequent manual editing may have added or removed some. + SRL_HDR_ARRAYREF_HIGH SRL_HDR_OBJECT_FREEZE SRL_HDR_RESERVED_HIGH */ + /* Offset 13 gives the best switch position. */ + switch (name[13]) { + case 'R': + if (memEQ(name, "SRL_HDR_ARRAYREF_HIGH", 21)) { + /* ^ */ +#ifdef SRL_HDR_ARRAYREF_HIGH + *iv_return = SRL_HDR_ARRAYREF_HIGH; + return PERL_constant_ISIV; +#else + return PERL_constant_NOTDEF; +#endif + } + break; + case 'T': + if (memEQ(name, "SRL_HDR_OBJECT_FREEZE", 21)) { + /* ^ */ +#ifdef SRL_HDR_OBJECT_FREEZE + *iv_return = SRL_HDR_OBJECT_FREEZE; + return PERL_constant_ISIV; +#else + return PERL_constant_NOTDEF; +#endif + } + break; + case 'V': + if (memEQ(name, "SRL_HDR_RESERVED_HIGH", 21)) { + /* ^ */ +#ifdef SRL_HDR_RESERVED_HIGH + *iv_return = SRL_HDR_RESERVED_HIGH; + return PERL_constant_ISIV; +#else + return PERL_constant_NOTDEF; +#endif + } + break; + } + return PERL_constant_NOTFOUND; +} + static int constant_25 (pTHX_ const char *name, IV *iv_return) { /* When generated this function returned values for the list of names given here. However, subsequent manual editing may have added or removed some. SRL_HDR_SHORT_BINARY_HIGH SRL_MASK_SHORT_BINARY_LEN - SRL_PROTOCOL_ENCODING_RAW SRL_PROTOCOL_VERSION_BITS - SRL_PROTOCOL_VERSION_MASK */ + SRL_PROTOCOL_ENCODING_RAW SRL_PROTOCOL_HDR_CONTINUE + SRL_PROTOCOL_VERSION_BITS SRL_PROTOCOL_VERSION_MASK */ /* Offset 24 gives the best switch position. */ switch (name[24]) { + case 'E': + if (memEQ(name, "SRL_PROTOCOL_HDR_CONTINU", 24)) { + /* E */ +#ifdef SRL_PROTOCOL_HDR_CONTINUE + *iv_return = SRL_PROTOCOL_HDR_CONTINUE; + return PERL_constant_ISIV; +#else + return PERL_constant_NOTDEF; +#endif + } + break; case 'H': if (memEQ(name, "SRL_HDR_SHORT_BINARY_HIG", 24)) { /* H */ @@ -628,6 +683,7 @@ my @names = (qw(SRL_HDR_ALIAS SRL_HDR_ARRAY SRL_HDR_ARRAYREF SRL_HDR_FLOAT SRL_HDR_HASH SRL_HDR_HASHREF SRL_HDR_HASHREF_HIGH SRL_HDR_HASHREF_LOW SRL_HDR_LONG_DOUBLE SRL_HDR_MANY SRL_HDR_NEG SRL_HDR_NEG_HIGH SRL_HDR_NEG_LOW SRL_HDR_OBJECT SRL_HDR_OBJECTV + SRL_HDR_OBJECTV_FREEZE SRL_HDR_OBJECT_FREEZE SRL_HDR_PACKET_START SRL_HDR_PAD SRL_HDR_POS SRL_HDR_POS_HIGH SRL_HDR_POS_LOW SRL_HDR_REFN SRL_HDR_REFP SRL_HDR_REGEXP SRL_HDR_RESERVED SRL_HDR_RESERVED_HIGH SRL_HDR_RESERVED_LOW @@ -639,8 +695,10 @@ my @names = (qw(SRL_HDR_ALIAS SRL_HDR_ARRAY SRL_HDR_ARRAYREF SRL_MASK_SHORT_BINARY_LEN SRL_NEG_MIN_SIZE SRL_POS_MAX_SIZE SRL_PROTOCOL_ENCODING_MASK SRL_PROTOCOL_ENCODING_RAW SRL_PROTOCOL_ENCODING_SNAPPY - SRL_PROTOCOL_ENCODING_SNAPPY_INCREMENTAL SRL_PROTOCOL_VERSION - SRL_PROTOCOL_VERSION_BITS SRL_PROTOCOL_VERSION_MASK)); + SRL_PROTOCOL_ENCODING_SNAPPY_INCREMENTAL + SRL_PROTOCOL_HDR_CONTINUE SRL_PROTOCOL_HDR_USER_DATA + SRL_PROTOCOL_VERSION SRL_PROTOCOL_VERSION_BITS + SRL_PROTOCOL_VERSION_MASK)); print constant_types(), "\n"; # macro defs foreach (C_constant ("Sereal::Encoder::Constants", 'constant', 'IV', $types, undef, 3, @names) ) { @@ -713,15 +771,18 @@ __END__ return constant_20 (aTHX_ name, iv_return); break; case 21: - /* Names all of length 21. */ - /* SRL_HDR_ARRAYREF_HIGH SRL_HDR_RESERVED_HIGH */ - /* Offset 10 gives the best switch position. */ - switch (name[10]) { + return constant_21 (aTHX_ name, iv_return); + break; + case 22: + /* Names all of length 22. */ + /* SRL_HDR_OBJECTV_FREEZE SRL_MASK_HASHREF_COUNT */ + /* Offset 6 gives the best switch position. */ + switch (name[6]) { case 'R': - if (memEQ(name, "SRL_HDR_ARRAYREF_HIGH", 21)) { - /* ^ */ -#ifdef SRL_HDR_ARRAYREF_HIGH - *iv_return = SRL_HDR_ARRAYREF_HIGH; + if (memEQ(name, "SRL_HDR_OBJECTV_FREEZE", 22)) { + /* ^ */ +#ifdef SRL_HDR_OBJECTV_FREEZE + *iv_return = SRL_HDR_OBJECTV_FREEZE; return PERL_constant_ISIV; #else return PERL_constant_NOTDEF; @@ -729,10 +790,10 @@ __END__ } break; case 'S': - if (memEQ(name, "SRL_HDR_RESERVED_HIGH", 21)) { - /* ^ */ -#ifdef SRL_HDR_RESERVED_HIGH - *iv_return = SRL_HDR_RESERVED_HIGH; + if (memEQ(name, "SRL_MASK_HASHREF_COUNT", 22)) { + /* ^ */ +#ifdef SRL_MASK_HASHREF_COUNT + *iv_return = SRL_MASK_HASHREF_COUNT; return PERL_constant_ISIV; #else return PERL_constant_NOTDEF; @@ -741,16 +802,6 @@ __END__ break; } break; - case 22: - if (memEQ(name, "SRL_MASK_HASHREF_COUNT", 22)) { -#ifdef SRL_MASK_HASHREF_COUNT - *iv_return = SRL_MASK_HASHREF_COUNT; - return PERL_constant_ISIV; -#else - return PERL_constant_NOTDEF; -#endif - } - break; case 23: if (memEQ(name, "SRL_MASK_ARRAYREF_COUNT", 23)) { #ifdef SRL_MASK_ARRAYREF_COUNT @@ -775,13 +826,32 @@ __END__ return constant_25 (aTHX_ name, iv_return); break; case 26: - if (memEQ(name, "SRL_PROTOCOL_ENCODING_MASK", 26)) { + /* Names all of length 26. */ + /* SRL_PROTOCOL_ENCODING_MASK SRL_PROTOCOL_HDR_USER_DATA */ + /* Offset 24 gives the best switch position. */ + switch (name[24]) { + case 'S': + if (memEQ(name, "SRL_PROTOCOL_ENCODING_MASK", 26)) { + /* ^ */ #ifdef SRL_PROTOCOL_ENCODING_MASK - *iv_return = SRL_PROTOCOL_ENCODING_MASK; - return PERL_constant_ISIV; + *iv_return = SRL_PROTOCOL_ENCODING_MASK; + return PERL_constant_ISIV; #else - return PERL_constant_NOTDEF; + return PERL_constant_NOTDEF; #endif + } + break; + case 'T': + if (memEQ(name, "SRL_PROTOCOL_HDR_USER_DATA", 26)) { + /* ^ */ +#ifdef SRL_PROTOCOL_HDR_USER_DATA + *iv_return = SRL_PROTOCOL_HDR_USER_DATA; + return PERL_constant_ISIV; +#else + return PERL_constant_NOTDEF; +#endif + } + break; } break; case 27: diff --git a/inc/Sereal/BuildTools.pm b/inc/Sereal/BuildTools.pm index a5aac0a..3f67d3e 100644 --- a/inc/Sereal/BuildTools.pm +++ b/inc/Sereal/BuildTools.pm @@ -61,7 +61,7 @@ sub generate_constant_includes { open my $fh, "<", "srl_protocol.h" or die $!; my (@string_const, @int_const); while (<$fh>) { - if (/^#\s*define\s*(SRL_\w+)\s*(.*)$/) { + if (/^#\s*define\s*(SRL_\w+)\s*(.*?)(?:\/\*|$)/) { my ($name, $value) = ($1, $2); next if $name =~ /_H_$/ or $name =~ /SET/ or $value =~ /"/; push @int_const, $name; diff --git a/lib/Sereal/Encoder.pm b/lib/Sereal/Encoder.pm index 58ee05c..a636541 100644 --- a/lib/Sereal/Encoder.pm +++ b/lib/Sereal/Encoder.pm @@ -5,14 +5,15 @@ use warnings; use Carp qw/croak/; use XSLoader; -our $VERSION = '0.37'; # Don't forget to update the TestCompat set for testing against installed decoders! +our $VERSION = '2.03'; # Don't forget to update the TestCompat set for testing against installed decoders! # not for public consumption, just for testing. -my $TestCompat = [ map sprintf("%.2f", $_/100), reverse( 23 .. int($VERSION * 100) ) ]; # compat with 0.23 to ... +(my $num_version = $VERSION) =~ s/_//; +my $TestCompat = [ map sprintf("%.2f", $_/100), reverse( 200 .. int($num_version * 100) ) ]; # compat with 2.00 to ... sub _test_compat {return(@$TestCompat, $VERSION)} use Exporter 'import'; -our @EXPORT_OK = qw(encode_sereal); +our @EXPORT_OK = qw(encode_sereal encode_sereal_with_header_data); our %EXPORT_TAGS = (all => \@EXPORT_OK); # export by default if run from command line our @EXPORT = ((caller())[1] eq '-e' ? @EXPORT_OK : ()); @@ -48,7 +49,7 @@ Its sister module L implements a decoder for this format. The two are released separately to allow for independent and safer upgrading. The Sereal protocol version emitted by this encoder implementation is currently -protocol version 1. +protocol version 2 by default. The protocol specification and many other bits of documentation can be found in the github repository. Right now, the specification is at @@ -69,15 +70,6 @@ encoder. Currently, the following options are recognized, none of them are on by default. -=head3 no_shared_hashkeys - -When the C option is set ot a true value, then -the encoder will disable the detection and elimination of repeated hash -keys. This only has an effect for serializing structures containing hashes. -By skipping the detection of repeated hash keys, performance goes up a bit, -but the size of the output can potentially be much larger. -Do not disable this unless you have a reason to. - =head3 snappy If set, the main payload of the Sereal document will be compressed using @@ -88,13 +80,18 @@ If in doubt, test with your data whether this helps or not. The decoder (version 0.04 and up) will know how to handle Snappy-compressed Sereal documents transparently. -B Do not use this if you want to parse multiple Sereal packets -from the same buffer. Instead use C instead. +B The C and C options are identical in +Sereal protocol V2 (the default). If using the C option +to emit Sereal V1 documents, this emits non-incrementally decodable +documents. See C in those cases. =head3 snappy_incr -Enables a version of the snappy protocol which is suitable for incremental -parsing of packets. See also the C option above for more details. +Same as the C option for default (Sereal V2) operation. + +In Sereal V1, enables a version of the snappy protocol which is suitable for +incremental parsing of packets. See also the C option above for +more details. =head3 snappy_threshold @@ -102,6 +99,8 @@ The size threshold (in bytes) of the uncompressed output below which snappy compression is not even attempted even if enabled. Defaults to one kilobyte (1024 bytes). Set to 0 and C to enabled to always compress. +Note that the document will not be compressed if the resulting size +will be bigger than the original size (even if snappy_threshold is 0). =head3 croak_on_bless @@ -116,6 +115,21 @@ See also C to skip the blessing of objects. When both flags are set, C has a higher precedence then C. +=head3 freeze_callbacks + +This option is new in Sereal v2 and needs a Sereal v2 decoder. + +If this option is set, the encoder will check for and possibly invoke +the C method on any object in the input data. An object that +was serialized using its C method will have its corresponding +C class method called during deserialization. The exact semantics +are documented below under L. + +Beware that using this functionality means a significant slowdown for +object serialization. Even when serializing objects without a C +method, the additional method look up will cost a small amount of runtime. +Yes, C is so fast that is may make a difference. + =head3 no_bless_objects If this option is set, then the encoder will serialize blessed references @@ -189,6 +203,16 @@ the memory. See L for why you might want to use this, and for the various caveats involved. +=head3 no_shared_hashkeys + +When the C option is set ot a true value, then +the encoder will disable the detection and elimination of repeated hash +keys. This only has an effect for serializing structures containing hashes. +By skipping the detection of repeated hash keys, performance goes up a bit, +but the size of the output can potentially be much larger. + +Do not disable this unless you have a reason to. + =head3 dedupe_strings If this is option is enabled/true then Sereal will use a hash to encode duplicates @@ -199,12 +223,14 @@ significant reduction in the size of the encoded form. Currently only strings longer than 3 characters will be deduped, however this may change in the future. Note that Sereal will perform certain types of deduping automatically even -without this option. In particular class names and hash keys are deduped +without this option. In particular class names and hash keys (see also the +C setting) are deduped regardless of this option. Only enable this if you have good reason to believe that there are many duplicated strings as values in your data structure. -Use of this option does not require an upgraded decoder. The deduping +Use of this option does not require an upgraded decoder (this option was added in +Sereal::Encoder 0.32). The deduping is performed in such a way that older decoders should handle it just fine. In other words, the output of a Sereal B should not depend on whether this option was used during B. See also below: @@ -226,9 +252,15 @@ when decoding. The upshot is that with this option, the application using (decoding) the data may save a lot of memory in some situations but at the cost of potential action at a distance due to the aliasing. -Beware: The test suite currently does not cover this option as well as it +I The test suite currently does not cover this option as well as it probably should. Patches welcome. +=head3 use_protocol_v1 + +If set, the encoder will emit Sereal documents following protocol version 1. +This is strongly discouraged except for temporary +compatibility/migration purposes. + =head1 INSTANCE METHODS =head2 encode @@ -256,6 +288,70 @@ F and F programs that are part of this distribution. Suffice to say that this library is easily competitive in both time and space efficiency with the best alternatives. +=head1 FREEZE/THAW CALLBACK MECHANISM + +This mechanism is enabled using the C option of the encoder. +It is inspired by the equivalent mechanism in L and differs only +in one minor detail, explained below. The general mechanism is documented +in the I section of L. +Similar to CBOR using C, Sereal uses the string C as a serializer +identifier for the callbacks. + +The one difference to the mechanism as supported by CBOR is that in Sereal, +the C callback must return a single value. That value can be any +data structure supported by Sereal (hopefully without causing infinite recursion +by including the original object). But C can't return a list as with CBOR. +This should not be any practical limitation whatsoever. Just return an array +reference instead of a list. + +Here is a contrived example of a class implementing the C / C mechansim. + + package + File; + + use Moo; + + has 'path' => (is => 'ro'); + has 'fh' => (is => 'rw'); + + # open file handle if necessary and return it + sub get_fh { + my $self = shift; + # This could also with fancier Moo(se) syntax + my $fh = $self->fh; + if (not $fh) { + open $fh, "<", $self->path or die $!; + $self->fh($fh); + } + return $fh; + } + + sub FREEZE { + my ($self, $serializer) = @_; + # Could switch on $serializer here: JSON, CBOR, Sereal, ... + # But this case is so simple that it will work with ALL of them. + # Do not try to serialize our file handle! Path will be enough + # to recreate. + return $self->path; + } + + sub THAW { + my ($class, $serializer, $data) = @_; + # Turn back into object. + return $class->new(path => $data); + } + +Why is the C/C mechanism important here? Our contrived C +class may contain a file handle which can't be serialized. So C not +only returns just the path (which is more compact than encoding the actual +object contents), but it strips the file handle which can be lazily reopened +on the other side of the serialization/deserialization pipe. +But this example also shows that a naive implementation can easily end up +with subtle bugs. A file handle itself has state (position in file, etc). +Thus the deserialization in the above example won't accurately reproduce +the original state. It can't, of course, if it's deserialized in a different +environment anyway. + =head1 THREAD-SAFETY C is thread-safe on Perl's 5.8.7 and higher. This means @@ -353,7 +449,20 @@ which is simply serializing a cache key, and thus there's little harm in an occasional false-negative, but think carefully before applying Sereal in other use-cases. -=head1 AUTHOR +=head1 BUGS, CONTACT AND SUPPORT + +For reporting bugs, please use the github bug tracker at +L. + +For support and discussion of Sereal, there are two Google Groups: + +Announcements around Sereal (extremely low volume): +L + +Sereal development list: +L + +=head1 AUTHORS Yves Orton Edemerphq@gmail.comE @@ -382,8 +491,8 @@ their gratitude. =head1 COPYRIGHT AND LICENSE -Copyright (C) 2012, 2013 by Steffen Mueller -Copyright (C) 2012, 2013 by Yves Orton +Copyright (C) 2012, 2013, 2014 by Steffen Mueller +Copyright (C) 2012, 2013, 2014 by Yves Orton The license for the code in this distribution is the following, with the exceptions listed below: diff --git a/lib/Sereal/Encoder/Constants.pm b/lib/Sereal/Encoder/Constants.pm index d0d03df..3fb8bc5 100644 --- a/lib/Sereal/Encoder/Constants.pm +++ b/lib/Sereal/Encoder/Constants.pm @@ -18,6 +18,8 @@ BEGIN { @EXPORT_OK = qw( SRL_PROTOCOL_ENCODING_RAW SRL_PROTOCOL_ENCODING_SNAPPY SRL_PROTOCOL_ENCODING_SNAPPY_INCREMENTAL + SRL_PROTOCOL_HDR_USER_DATA + SRL_PROTOCOL_HDR_CONTINUE SRL_POS_MAX_SIZE SRL_NEG_MIN_SIZE SRL_HDR_POS @@ -44,6 +46,8 @@ BEGIN { @EXPORT_OK = qw( SRL_HDR_COPY SRL_HDR_WEAKEN SRL_HDR_REGEXP + SRL_HDR_OBJECT_FREEZE + SRL_HDR_OBJECTV_FREEZE SRL_HDR_RESERVED SRL_HDR_RESERVED_LOW SRL_HDR_RESERVED_HIGH diff --git a/ptable.h b/ptable.h index a51ffcd..5536b33 100644 --- a/ptable.h +++ b/ptable.h @@ -45,6 +45,12 @@ #define PTABLE_HASH(ptr) ptr_hash(PTR2nat(ptr)) +#define PTABLE_FLAG_AUTOCLEAN 1 + +typedef struct PTABLE_entry PTABLE_ENTRY_t; +typedef struct PTABLE PTABLE_t; +typedef struct PTABLE_iter PTABLE_ITER_t; + struct PTABLE_entry { struct PTABLE_entry *next; void *key; @@ -55,6 +61,7 @@ struct PTABLE { struct PTABLE_entry **tbl_ary; UV tbl_max; UV tbl_items; + PTABLE_ITER_t *cur_iter; /* one iterator at a time can be auto-freed */ }; struct PTABLE_iter { @@ -63,10 +70,6 @@ struct PTABLE_iter { struct PTABLE_entry *cur_entry; }; -typedef struct PTABLE_entry PTABLE_ENTRY_t; -typedef struct PTABLE PTABLE_t; -typedef struct PTABLE_iter PTABLE_ITER_t; - STATIC PTABLE_t * PTABLE_new(void); STATIC PTABLE_t * PTABLE_new_size(const U8 size_base2_exponent); @@ -76,9 +79,11 @@ STATIC void PTABLE_store(PTABLE_t *tbl, void *key, void *value); STATIC void PTABLE_delete(PTABLE_t *tbl, void *key); STATIC void PTABLE_grow(PTABLE_t *tbl); STATIC void PTABLE_clear(PTABLE_t *tbl); +STATIC void PTABLE_clear_dec(pTHX_ PTABLE_t *tbl); STATIC void PTABLE_free(PTABLE_t *tbl); STATIC PTABLE_ITER_t * PTABLE_iter_new(PTABLE_t *tbl); +STATIC PTABLE_ITER_t * PTABLE_iter_new_flags(PTABLE_t *tbl, int flags); STATIC PTABLE_ENTRY_t * PTABLE_iter_next(PTABLE_ITER_t *iter); STATIC void PTABLE_iter_free(PTABLE_ITER_t *iter); @@ -96,6 +101,7 @@ PTABLE_new_size(const U8 size_base2_exponent) Newxz(tbl, 1, PTABLE_t); tbl->tbl_max = (1 << size_base2_exponent) - 1; tbl->tbl_items = 0; + tbl->cur_iter = NULL; Newxz(tbl->tbl_ary, tbl->tbl_max + 1, PTABLE_ENTRY_t*); return tbl; } @@ -207,6 +213,36 @@ PTABLE_clear(PTABLE_t *tbl) } } +STATIC void +PTABLE_clear_dec(pTHX_ PTABLE_t *tbl) +{ + if (tbl && tbl->tbl_items) { + register PTABLE_ENTRY_t * * const array = tbl->tbl_ary; + UV riter = tbl->tbl_max; + + do { + PTABLE_ENTRY_t *entry = array[riter]; + + while (entry) { + PTABLE_ENTRY_t * const oentry = entry; + entry = entry->next; + if (oentry->value) + SvREFCNT_dec((SV*)(oentry->value)); + Safefree(oentry); + } + + /* chocolateboy 2008-01-08 + * + * make sure we clear the array entry, so that subsequent probes fail + */ + + array[riter] = NULL; + } while (riter--); + + tbl->tbl_items = 0; + } +} + /* remove one entry from a ptr table */ STATIC void @@ -243,10 +279,15 @@ PTABLE_delete(PTABLE_t *tbl, void *key) STATIC void PTABLE_free(PTABLE_t *tbl) { - if (!tbl) { + if (!tbl) return; - } + PTABLE_clear(tbl); + if (tbl->cur_iter) { + PTABLE_ITER_t *it = tbl->cur_iter; + tbl->cur_iter = NULL; /* avoid circular checks */ + PTABLE_iter_free(it); + } Safefree(tbl->tbl_ary); Safefree(tbl); } @@ -271,12 +312,21 @@ PTABLE_free(PTABLE_t *tbl) /* Create new iterator object */ STATIC PTABLE_ITER_t * PTABLE_iter_new(PTABLE_t *tbl) +{ + return PTABLE_iter_new_flags(tbl, 0); +} + +STATIC PTABLE_ITER_t * +PTABLE_iter_new_flags(PTABLE_t *tbl, int flags) { PTABLE_ITER_t *iter; Newx(iter, 1, PTABLE_ITER_t); iter->table = tbl; iter->bucket_num = 0; iter->cur_entry = NULL; + + if (flags & PTABLE_FLAG_AUTOCLEAN) + tbl->cur_iter = iter; if (tbl->tbl_items == 0) { /* Prevent hash bucket scanning. * This can be a significant optimization on large, empty hashes. */ @@ -302,8 +352,23 @@ PTABLE_iter_next(PTABLE_ITER_t *iter) STATIC void PTABLE_iter_free(PTABLE_ITER_t *iter) { + /* If we're the iterator that can be auto-cleaned by the PTABLE, + * then unregister. */ + if (iter->table->cur_iter == iter) + iter->table->cur_iter = NULL; + Safefree(iter); } +STATIC void +PTABLE_debug_dump(PTABLE_t *tbl, void (*func)(PTABLE_ENTRY_t *e)) +{ + PTABLE_ENTRY_t *e; + PTABLE_ITER_t *iter = PTABLE_iter_new(tbl); + while (NULL != (e = PTABLE_iter_next(iter))) { + func(e); + } + PTABLE_iter_free(iter); +} #endif diff --git a/snappy/csnappy_compress.c b/snappy/csnappy_compress.c index 4f9c07a..abc103b 100644 --- a/snappy/csnappy_compress.c +++ b/snappy/csnappy_compress.c @@ -380,12 +380,12 @@ EmitCopyLessThan64(char *op, int offset, int len) if ((len < 12) && (offset < 2048)) { int len_minus_4 = len - 4; DCHECK_LT(len_minus_4, 8); /* Must fit in 3 bits */ - *op++ = COPY_1_BYTE_OFFSET | - ((len_minus_4) << 2) | + *op++ = COPY_1_BYTE_OFFSET + + ((len_minus_4) << 2) + ((offset >> 8) << 5); *op++ = offset & 0xff; } else { - *op++ = COPY_2_BYTE_OFFSET | ((len-1) << 2); + *op++ = COPY_2_BYTE_OFFSET + ((len-1) << 2); put_unaligned_le16(offset, op); op += 2; } diff --git a/snappy/csnappy_internal_userspace.h b/snappy/csnappy_internal_userspace.h index 4d06d86..eb0e1b3 100644 --- a/snappy/csnappy_internal_userspace.h +++ b/snappy/csnappy_internal_userspace.h @@ -131,7 +131,7 @@ Albert Lee #define __LITTLE_ENDIAN LITTLE_ENDIAN #define __BIG_ENDIAN BIG_ENDIAN -#elif defined(__FreeBSD__) || defined(__DragonFlyBSD__) || defined(__NetBSD__) +#elif defined(__FreeBSD__) || defined(__DragonFly__) || defined(__NetBSD__) #include #define bswap_16(x) bswap16(x) diff --git a/srl_buffer.h b/srl_buffer.h index 1dc88da..7b7ab68 100644 --- a/srl_buffer.h +++ b/srl_buffer.h @@ -4,8 +4,11 @@ #include "assert.h" #include "srl_inline.h" +#include "srl_common.h" #include "srl_encoder.h" +#include "srl_buffer_types.h" + #ifdef MEMDEBUG # define BUFFER_GROWTH_FACTOR 1 #else @@ -16,22 +19,37 @@ * usable in one place per compilation unit. Drop "static" when necessary. * For now, potentially smaller code wins. */ - /* buffer operations */ -#define BUF_POS_OFS(enc) ((enc)->pos - (enc)->buf_start) -#define BUF_SPACE(enc) ((enc)->buf_end - (enc)->pos) -#define BUF_SIZE(enc) ((enc)->buf_end - (enc)->buf_start) -#define BUF_NEED_GROW(enc, minlen) ((size_t)BUF_SPACE(enc) <= minlen) -#define BUF_NEED_GROW_TOTAL(enc, minlen) ((size_t)BUF_SIZE(enc) <= minlen) +#define BUF_POS_OFS(buf) (((buf).pos) - ((buf).start)) +#define BUF_SPACE(buf) (((buf).end) - ((buf).pos)) +#define BUF_SIZE(buf) (((buf).end) - ((buf).start)) +#define BUF_NEED_GROW(buf, minlen) ((size_t)BUF_SPACE(buf) <= minlen) +#define BUF_NEED_GROW_TOTAL(buf, minlen) ((size_t)BUF_SIZE(buf) <= minlen) + + +/* body-position/size related operations */ +#define BODY_POS_OFS(buf) (((buf).pos) - ((buf).body_pos)) + +/* these are mostly for right between (de)serializing the header and the body */ +#define SRL_SET_BODY_POS(enc, pos_ptr) ((enc)->buf.body_pos = pos_ptr) +#define SRL_UPDATE_BODY_POS(enc) \ + STMT_START { \ + if (expect_false(SRL_ENC_HAVE_OPTION((enc), SRL_F_USE_PROTO_V1))) { \ + SRL_SET_BODY_POS(enc, (enc)->buf.start); \ + } else { \ + SRL_SET_BODY_POS(enc, (enc)->buf.pos-1); \ + } \ + } STMT_END + /* Internal debugging macros, used only in DEBUG mode */ #ifndef NDEBUG #define DEBUG_ASSERT_BUF_SPACE(enc, len) STMT_START { \ - if((BUF_SPACE(enc) < (ptrdiff_t)(len))) { \ + if((BUF_SPACE(enc->buf) < (ptrdiff_t)(len))) { \ warn("failed assertion check - pos: %ld [%p %p %p] %ld < %ld", \ - (long)BUF_POS_OFS(enc), (enc)->buf_start, (enc)->pos, (enc)->buf_end, (long)BUF_SPACE(enc),(long)(len)); \ + (long)BUF_POS_OFS(enc->buf), (enc)->buf.start, (enc)->buf.pos, (enc)->buf.end, (long)BUF_SPACE(enc->buf),(long)(len)); \ } \ - assert(BUF_SPACE(enc) >= (ptrdiff_t)(len)); \ + assert(BUF_SPACE(enc->buf) >= (ptrdiff_t)(len)); \ } STMT_END #else #define DEBUG_ASSERT_BUF_SPACE(enc, len) ((void)0) @@ -39,60 +57,110 @@ #ifndef NDEBUG #define DEBUG_ASSERT_BUF_SANE(enc) STMT_START { \ - if(!(((enc)->buf_start <= (enc)->pos) && ((enc)->pos <= (enc)->buf_end))){\ + if(!(((enc)->buf.start <= (enc)->buf.pos) && ((enc)->buf.pos <= (enc)->buf.end))){\ warn("failed sanity assertion check - pos: %ld [%p %p %p] %ld", \ - (long)BUF_POS_OFS(enc), (enc)->buf_start, (enc)->pos, (enc)->buf_end, (long)BUF_SPACE(enc)); \ + (long)BUF_POS_OFS(enc->buf), (enc)->buf.start, (enc)->buf.pos, (enc)->buf.end, (long)BUF_SPACE(enc->buf)); \ } \ - assert(((enc)->buf_start <= (enc)->pos) && ((enc)->pos <= (enc)->buf_end));\ + assert(((enc)->buf.start <= (enc)->buf.pos) && ((enc)->buf.pos <= (enc)->buf.end));\ } STMT_END #else -#define DEBUG_ASSERT_BUF_SANE(enc) assert(((enc)->buf_start <= (enc)->pos) && ((enc)->pos <= (enc)->buf_end)) +#define DEBUG_ASSERT_BUF_SANE(enc) assert(((enc)->buf.start <= (enc)->buf.pos) && ((enc)->buf.pos <= (enc)->buf.end)) #endif +/* Allocate a virgin buffer (but not the buffer struct) */ +SRL_STATIC_INLINE int +srl_buf_init_buffer(pTHX_ srl_buffer_t *buf, const STRLEN init_size) +{ + Newx(buf->start, init_size, char); + if (expect_false( buf->start == NULL )) + return 1; + buf->end = buf->start + init_size - 1; + buf->pos = buf->start; + buf->body_pos = buf->start; /* SRL_SET_BODY_POS(enc, enc->buf.start) equiv */ + return 0; +} + +/* Free a buffer (but not the buffer struct) */ +SRL_STATIC_INLINE void +srl_buf_free_buffer(pTHX_ srl_buffer_t *buf) +{ + Safefree(buf->start); +} + +/* Copy one buffer to another (shallowly!) */ +SRL_STATIC_INLINE void +srl_buf_copy_buffer(pTHX_ srl_buffer_t *src, srl_buffer_t *dest) +{ + Copy(src, dest, 1, srl_buffer_t); +} + +/* Swap two buffers */ +SRL_STATIC_INLINE void +srl_buf_swap_buffer(pTHX_ srl_buffer_t *buf1, srl_buffer_t *buf2) +{ + srl_buffer_t tmp; + Copy(buf1, &tmp, 1, srl_buffer_t); + Copy(buf2, buf1, 1, srl_buffer_t); + Copy(&tmp, buf2, 1, srl_buffer_t); +} + + SRL_STATIC_INLINE void srl_buf_grow_nocheck(pTHX_ srl_encoder_t *enc, size_t minlen) { - const size_t pos_ofs= BUF_POS_OFS(enc); /* have to store the offset of pos */ + const size_t pos_ofs= BUF_POS_OFS(enc->buf); /* have to store the offset of pos */ + const size_t body_ofs= enc->buf.body_pos - enc->buf.start; /* have to store the offset of the body */ #ifdef MEMDEBUG const size_t new_size = minlen; #else - const size_t cur_size = BUF_SIZE(enc); + const size_t cur_size = BUF_SIZE(enc->buf); const size_t grown_len = (size_t)(cur_size * BUFFER_GROWTH_FACTOR); const size_t new_size = 100 + (minlen > grown_len ? minlen : grown_len); #endif + DEBUG_ASSERT_BUF_SANE(enc); /* assert that Renew means GROWING the buffer */ - assert(enc->buf_start + new_size > enc->buf_end); - Renew(enc->buf_start, new_size, char); - if (enc->buf_start == NULL) + assert(enc->buf.start + new_size > enc->buf.end); + + Renew(enc->buf.start, new_size, char); + if (enc->buf.start == NULL) croak("Out of memory!"); - enc->buf_end = (char *)(enc->buf_start + new_size); - enc->pos= enc->buf_start + pos_ofs; - assert(enc->buf_end - enc->buf_start > (ptrdiff_t)0); + enc->buf.end = (char *)(enc->buf.start + new_size); + enc->buf.pos= enc->buf.start + pos_ofs; + SRL_SET_BODY_POS(enc, enc->buf.start + body_ofs); + + DEBUG_ASSERT_BUF_SANE(enc); + assert(enc->buf.end - enc->buf.start > (ptrdiff_t)0); + assert(enc->buf.pos - enc->buf.start >= (ptrdiff_t)0); + /* The following is checking against -1 because SRL_UPDATE_BODY_POS + * will actually set the body_pos to pos-1, where pos can be 0. + * This works out fine in the end, but is admittedly a bit shady. + * FIXME */ + assert(enc->buf.body_pos - enc->buf.start >= (ptrdiff_t)-1); } -#define BUF_SIZE_ASSERT(enc, minlen) \ - STMT_START { \ - DEBUG_ASSERT_BUF_SANE(enc); \ - if (BUF_NEED_GROW(enc, minlen)) \ - srl_buf_grow_nocheck(aTHX_ (enc), (BUF_SIZE(enc) + minlen)); \ - DEBUG_ASSERT_BUF_SANE(enc); \ +#define BUF_SIZE_ASSERT(enc, minlen) \ + STMT_START { \ + DEBUG_ASSERT_BUF_SANE(enc); \ + if (BUF_NEED_GROW(enc->buf, minlen)) \ + srl_buf_grow_nocheck(aTHX_ (enc), (BUF_SIZE(enc->buf) + minlen)); \ + DEBUG_ASSERT_BUF_SANE(enc); \ } STMT_END -#define BUF_SIZE_ASSERT_TOTAL(enc, minlen) \ - STMT_START { \ - DEBUG_ASSERT_BUF_SANE(enc); \ - if (BUF_NEED_GROW_TOTAL(enc, minlen)) \ - srl_buf_grow_nocheck(aTHX_ (enc), (minlen)); \ - DEBUG_ASSERT_BUF_SANE(enc); \ +#define BUF_SIZE_ASSERT_TOTAL(enc, minlen) \ + STMT_START { \ + DEBUG_ASSERT_BUF_SANE(enc); \ + if (BUF_NEED_GROW_TOTAL(enc->buf, minlen)) \ + srl_buf_grow_nocheck(aTHX_ (enc), (minlen)); \ + DEBUG_ASSERT_BUF_SANE(enc); \ } STMT_END SRL_STATIC_INLINE void srl_buf_cat_str_int(pTHX_ srl_encoder_t *enc, const char *str, size_t len) { BUF_SIZE_ASSERT(enc, len); - Copy(str, enc->pos, len, char); - enc->pos += len; + Copy(str, enc->buf.pos, len, char); + enc->buf.pos += len; DEBUG_ASSERT_BUF_SANE(enc); } #define srl_buf_cat_str(enc, str, len) srl_buf_cat_str_int(aTHX_ enc, str, len) @@ -104,8 +172,8 @@ srl_buf_cat_str_nocheck_int(pTHX_ srl_encoder_t *enc, const char *str, size_t le { DEBUG_ASSERT_BUF_SANE(enc); DEBUG_ASSERT_BUF_SPACE(enc, len); - Copy(str, enc->pos, len, char); - enc->pos += len; + Copy(str, enc->buf.pos, len, char); + enc->buf.pos += len; DEBUG_ASSERT_BUF_SANE(enc); } #define srl_buf_cat_str_nocheck(enc, str, len) srl_buf_cat_str_nocheck_int(aTHX_ enc, str, len) @@ -118,7 +186,7 @@ srl_buf_cat_char_int(pTHX_ srl_encoder_t *enc, const char c) DEBUG_ASSERT_BUF_SANE(enc); BUF_SIZE_ASSERT(enc, 1); DEBUG_ASSERT_BUF_SPACE(enc, 1); - *enc->pos++ = c; + *enc->buf.pos++ = c; DEBUG_ASSERT_BUF_SANE(enc); } #define srl_buf_cat_char(enc, c) srl_buf_cat_char_int(aTHX_ enc, c) @@ -128,7 +196,7 @@ srl_buf_cat_char_nocheck_int(pTHX_ srl_encoder_t *enc, const char c) { DEBUG_ASSERT_BUF_SANE(enc); DEBUG_ASSERT_BUF_SPACE(enc, 1); - *enc->pos++ = c; + *enc->buf.pos++ = c; DEBUG_ASSERT_BUF_SANE(enc); } #define srl_buf_cat_char_nocheck(enc, c) srl_buf_cat_char_nocheck_int(aTHX_ enc, c) @@ -140,13 +208,13 @@ SRL_STATIC_INLINE void srl_buf_cat_varint_nocheck(pTHX_ srl_encoder_t *enc, const char tag, UV n) { DEBUG_ASSERT_BUF_SANE(enc); DEBUG_ASSERT_BUF_SPACE(enc, (tag==0 ? 0 : 1) + SRL_MAX_VARINT_LENGTH); - if (tag) - *enc->pos++ = tag; - while (n >= 0x80) { /* while we are larger than 7 bits long */ - *enc->pos++ = (n & 0x7f) | 0x80; /* write out the least significant 7 bits, set the high bit */ - n = n >> 7; /* shift off the 7 least significant bits */ + if (expect_true( tag )) + *enc->buf.pos++ = tag; + while (n >= 0x80) { /* while we are larger than 7 bits long */ + *enc->buf.pos++ = (n & 0x7f) | 0x80; /* write out the least significant 7 bits, set the high bit */ + n = n >> 7; /* shift off the 7 least significant bits */ } - *enc->pos++ = n; /* encode the last 7 bits without the high bit being set */ + *enc->buf.pos++ = n; /* encode the last 7 bits without the high bit being set */ DEBUG_ASSERT_BUF_SANE(enc); } diff --git a/srl_buffer_types.h b/srl_buffer_types.h new file mode 100644 index 0000000..66ad25a --- /dev/null +++ b/srl_buffer_types.h @@ -0,0 +1,11 @@ +#ifndef SRL_BUFFER_TYPES_H_ +#define SRL_BUFFER_TYPES_H_ + +typedef struct { + char *start; /* ptr to "physical" start of output buffer */ + char *end; /* ptr to end of output buffer */ + char *pos; /* ptr to current position within output buffer */ + char *body_pos; /* ptr to start of body within output buffer for protocol V2 encoding */ +} srl_buffer_t; + +#endif diff --git a/srl_encoder.c b/srl_encoder.c index 24554ef..7c54964 100644 --- a/srl_encoder.c +++ b/srl_encoder.c @@ -82,6 +82,7 @@ extern "C" { #define DEBUGHACK 0 /* some static function declarations */ +SRL_STATIC_INLINE void srl_clear_seen_hashes(pTHX_ srl_encoder_t *enc); static void srl_dump_sv(pTHX_ srl_encoder_t *enc, SV *src); SRL_STATIC_INLINE void srl_dump_svpv(pTHX_ srl_encoder_t *enc, SV *src); SRL_STATIC_INLINE void srl_dump_pv(pTHX_ srl_encoder_t *enc, const char* src, STRLEN src_len, int is_utf8); @@ -91,9 +92,11 @@ SRL_STATIC_INLINE void srl_dump_hv(pTHX_ srl_encoder_t *enc, HV *src, U32 refcnt SRL_STATIC_INLINE void srl_dump_hk(pTHX_ srl_encoder_t *enc, HE *src, const int share_keys); SRL_STATIC_INLINE void srl_dump_nv(pTHX_ srl_encoder_t *enc, SV *src); SRL_STATIC_INLINE void srl_dump_ivuv(pTHX_ srl_encoder_t *enc, SV *src); -SRL_STATIC_INLINE void srl_dump_classname(pTHX_ srl_encoder_t *enc, SV *src); +SRL_STATIC_INLINE void srl_dump_classname(pTHX_ srl_encoder_t *enc, SV *referent, SV *replacement); +SRL_STATIC_INLINE SV *srl_get_frozen_object(pTHX_ srl_encoder_t *enc, SV *src, SV *referent); SRL_STATIC_INLINE PTABLE_t *srl_init_string_hash(srl_encoder_t *enc); SRL_STATIC_INLINE PTABLE_t *srl_init_ref_hash(srl_encoder_t *enc); +SRL_STATIC_INLINE PTABLE_t *srl_init_freezeobj_svhash(srl_encoder_t *enc); SRL_STATIC_INLINE PTABLE_t *srl_init_weak_hash(srl_encoder_t *enc); SRL_STATIC_INLINE HV *srl_init_string_deduper_hv(pTHX_ srl_encoder_t *enc); @@ -113,6 +116,10 @@ SRL_STATIC_INLINE HV *srl_init_string_deduper_hv(pTHX_ srl_encoder_t *enc); ? srl_init_weak_hash(enc) \ : (enc)->weak_seenhash ) +#define SRL_GET_FREEZEOBJ_SVHASH(enc) ( (enc)->freezeobj_svhash == NULL \ + ? srl_init_freezeobj_svhash(enc) \ + : (enc)->freezeobj_svhash ) + #define CALL_SRL_DUMP_SV(enc, src) STMT_START { \ if (!(src)) { \ srl_buf_cat_char((enc), SRL_HDR_UNDEF); \ @@ -143,8 +150,6 @@ SRL_STATIC_INLINE HV *srl_init_string_deduper_hv(pTHX_ srl_encoder_t *enc); } \ } STMT_END - - /* This is fired when we exit the Perl pseudo-block. * It frees our encoder and all. Put encoder-level cleanup * logic here so that we can simply use croak/longjmp for @@ -165,23 +170,36 @@ srl_destructor_hook(pTHX_ void *p) } } -void -srl_clear_encoder(pTHX_ srl_encoder_t *enc) +SRL_STATIC_INLINE void +srl_clear_seen_hashes(pTHX_ srl_encoder_t *enc) { - if (!SRL_ENC_HAVE_OPER_FLAG(enc, SRL_OF_ENCODER_DIRTY)) { - warn("Sereal Encoder being cleared but in virgin state. That is unexpected."); - } - - enc->recursion_depth = 0; if (enc->ref_seenhash != NULL) PTABLE_clear(enc->ref_seenhash); + if (enc->freezeobj_svhash != NULL) + PTABLE_clear_dec(aTHX_ enc->freezeobj_svhash); if (enc->str_seenhash != NULL) PTABLE_clear(enc->str_seenhash); if (enc->weak_seenhash != NULL) PTABLE_clear(enc->weak_seenhash); if (enc->string_deduper_hv != NULL) hv_clear(enc->string_deduper_hv); - enc->pos = enc->buf_start; +} + +void +srl_clear_encoder(pTHX_ srl_encoder_t *enc) +{ + if (!SRL_ENC_HAVE_OPER_FLAG(enc, SRL_OF_ENCODER_DIRTY)) { + warn("Sereal Encoder being cleared but in virgin state. That is unexpected."); + } + + enc->recursion_depth = 0; + srl_clear_seen_hashes(aTHX_ enc); + + enc->buf.pos = enc->buf.start; + /* tmp_buf.start may be NULL for an unused tmp_buf, but so what? */ + enc->tmp_buf.pos = enc->tmp_buf.start; + + SRL_SET_BODY_POS(enc, enc->buf.start); SRL_ENC_RESET_OPER_FLAG(enc, SRL_OF_ENCODER_DIRTY); } @@ -189,16 +207,26 @@ srl_clear_encoder(pTHX_ srl_encoder_t *enc) void srl_destroy_encoder(pTHX_ srl_encoder_t *enc) { - Safefree(enc->buf_start); + srl_buf_free_buffer(aTHX_ &enc->buf); + + /* Free tmp buffer only if it was allocated at all. */ + if (enc->tmp_buf.start != NULL) + srl_buf_free_buffer(aTHX_ &enc->tmp_buf); + Safefree(enc->snappy_workmem); if (enc->ref_seenhash != NULL) PTABLE_free(enc->ref_seenhash); + if (enc->freezeobj_svhash != NULL) + PTABLE_free(enc->freezeobj_svhash); if (enc->str_seenhash != NULL) PTABLE_free(enc->str_seenhash); if (enc->weak_seenhash != NULL) PTABLE_free(enc->weak_seenhash); if (enc->string_deduper_hv != NULL) SvREFCNT_dec(enc->string_deduper_hv); + + SvREFCNT_dec(enc->sereal_string_sv); + Safefree(enc); } @@ -211,14 +239,16 @@ srl_empty_encoder_struct(pTHX) if (enc == NULL) croak("Out of memory"); - /* Init struct */ - Newx(enc->buf_start, INITIALIZATION_SIZE, char); - if (enc->buf_start == NULL) { + /* Init buffer struct */ + if (expect_false( srl_buf_init_buffer(aTHX_ &(enc->buf), INITIALIZATION_SIZE) != 0 )) { Safefree(enc); croak("Out of memory"); } - enc->buf_end = enc->buf_start + INITIALIZATION_SIZE - 1; - enc->pos = enc->buf_start; + + /* Set the tmp buffer struct's char buffer to NULL so we don't free + * something nasty if it's unused. */ + enc->tmp_buf.start = NULL; + enc->recursion_depth = 0; enc->max_recursion_depth = DEFAULT_MAX_RECUR_DEPTH; enc->operational_flags = 0; @@ -227,8 +257,10 @@ srl_empty_encoder_struct(pTHX) enc->weak_seenhash = NULL; enc->str_seenhash = NULL; enc->ref_seenhash = NULL; + enc->freezeobj_svhash = NULL; enc->snappy_workmem = NULL; enc->string_deduper_hv = NULL; + enc->sereal_string_sv = NULL; return enc; } @@ -246,12 +278,17 @@ srl_build_encoder_struct(pTHX_ HV *opt) /* load options */ if (opt != NULL) { int undef_unknown = 0; - int snappy = 0; + int snappy_nonincr = 0; /* SRL_F_SHARED_HASHKEYS on by default */ svp = hv_fetchs(opt, "no_shared_hashkeys", 0); if ( !svp || !SvTRUE(*svp) ) SRL_ENC_SET_OPTION(enc, SRL_F_SHARED_HASHKEYS); + /* Needs to be before the snappy options */ + svp = hv_fetchs(opt, "use_protocol_v1", 0); + if ( svp && SvTRUE(*svp) ) + SRL_ENC_SET_OPTION(enc, SRL_F_USE_PROTO_V1); + svp = hv_fetchs(opt, "croak_on_bless", 0); if ( svp && SvTRUE(*svp) ) SRL_ENC_SET_OPTION(enc, SRL_F_CROAK_ON_BLESS); @@ -260,15 +297,29 @@ srl_build_encoder_struct(pTHX_ HV *opt) if ( svp && SvTRUE(*svp) ) SRL_ENC_SET_OPTION(enc, SRL_F_NO_BLESS_OBJECTS); + svp = hv_fetchs(opt, "freeze_callbacks", 0); + if ( svp && SvTRUE(*svp) ) { + if (SRL_ENC_HAVE_OPTION(enc, SRL_F_NO_BLESS_OBJECTS)) + croak("The no_bless_objects and freeze_callback_support " + "options are mutually exclusive"); + SRL_ENC_SET_OPTION(enc, SRL_F_ENABLE_FREEZE_SUPPORT); + enc->sereal_string_sv = newSVpvs("Sereal"); + } + svp = hv_fetchs(opt, "snappy", 0); if ( svp && SvTRUE(*svp) ) { - snappy = 1; - SRL_ENC_SET_OPTION(enc, SRL_F_COMPRESS_SNAPPY); + /* incremental is the new black in V2 */ + if (expect_true( !SRL_ENC_HAVE_OPTION(enc, SRL_F_USE_PROTO_V1) )) + SRL_ENC_SET_OPTION(enc, SRL_F_COMPRESS_SNAPPY_INCREMENTAL); + else { + snappy_nonincr = 1; + SRL_ENC_SET_OPTION(enc, SRL_F_COMPRESS_SNAPPY); + } } svp = hv_fetchs(opt, "snappy_incr", 0); if ( svp && SvTRUE(*svp) ) { - if (snappy) + if (snappy_nonincr) croak("'snappy' and 'snappy_incr' options are mutually exclusive"); SRL_ENC_SET_OPTION(enc, SRL_F_COMPRESS_SNAPPY_INCREMENTAL); } @@ -358,6 +409,13 @@ srl_init_weak_hash(srl_encoder_t *enc) return enc->weak_seenhash; } +SRL_STATIC_INLINE PTABLE_t * +srl_init_freezeobj_svhash(srl_encoder_t *enc) +{ + enc->freezeobj_svhash = PTABLE_new_size(3); + return enc->freezeobj_svhash; +} + SRL_STATIC_INLINE HV * srl_init_string_deduper_hv(pTHX_ srl_encoder_t *enc) { @@ -380,10 +438,10 @@ srl_init_snappy_workmem(pTHX_ srl_encoder_t *enc) void -srl_write_header(pTHX_ srl_encoder_t *enc) +srl_write_header(pTHX_ srl_encoder_t *enc, SV *user_header_src) { /* 4th to 8th bit are flags. Using 4th for snappy flag. FIXME needs to go in spec. */ - const U8 version_and_flags = SRL_PROTOCOL_VERSION + const U8 version_and_flags = (SRL_ENC_HAVE_OPTION(enc, SRL_F_USE_PROTO_V1) ? 1 : SRL_PROTOCOL_VERSION) | ( SRL_ENC_HAVE_OPTION(enc, SRL_F_COMPRESS_SNAPPY) ? SRL_PROTOCOL_ENCODING_SNAPPY @@ -398,7 +456,43 @@ srl_write_header(pTHX_ srl_encoder_t *enc) BUF_SIZE_ASSERT(enc, sizeof(SRL_MAGIC_STRING) + 1 + 1); srl_buf_cat_str_s_nocheck(enc, SRL_MAGIC_STRING); srl_buf_cat_char_nocheck(enc, version_and_flags); - srl_buf_cat_char_nocheck(enc, '\0'); /* variable header length (0 right now) */ + if (user_header_src == NULL) { + srl_buf_cat_char_nocheck(enc, '\0'); /* variable header length (0 right now) */ + } + else { + STRLEN user_data_len; + + if (expect_false( SRL_ENC_HAVE_OPTION(enc, SRL_F_USE_PROTO_V1) )) + croak("Cannot serialize user header data in Sereal protocol V1 mode!"); + + /* Allocate tmp buffer for swapping if necessary, + * will be cleaned up automatically */ + if (enc->tmp_buf.start == NULL) + srl_buf_init_buffer(aTHX_ &enc->tmp_buf, INITIALIZATION_SIZE); + + /* Write document body (for header) into separate buffer */ + srl_buf_swap_buffer(aTHX_ &enc->tmp_buf, &enc->buf); + SRL_UPDATE_BODY_POS(enc); + srl_dump_sv(aTHX_ enc, user_header_src); + srl_fixup_weakrefs(aTHX_ enc); /* more bodies to follow */ + srl_clear_seen_hashes(aTHX_ enc); /* more bodies to follow */ + + /* Swap main buffer back in, encode header length&bitfield, copy user header data */ + user_data_len = BUF_POS_OFS(enc->buf); + srl_buf_swap_buffer(aTHX_ &enc->buf, &enc->tmp_buf); + + BUF_SIZE_ASSERT(enc, user_data_len + 1 + SRL_MAX_VARINT_LENGTH); /* +1 for bit field, +X for header len */ + + /* Encode header length */ + srl_buf_cat_varint_nocheck(aTHX_ enc, 0, (UV)(user_data_len + 1)); /* +1 for bit field */ + /* Encode bitfield */ + srl_buf_cat_char_nocheck(enc, '\1'); + /* Copy user header data */ + Copy(enc->tmp_buf.start, enc->buf.pos, user_data_len, char); + enc->buf.pos += user_data_len; + + enc->tmp_buf.pos = enc->tmp_buf.start; /* reset tmp buffer just to be clean */ + } } /* The following is to handle the fact that under normal build options @@ -439,18 +533,18 @@ srl_dump_nv(pTHX_ srl_encoder_t *enc, SV *src) if ( f == nv || nv != nv ) { BUF_SIZE_ASSERT(enc, 1 + sizeof(f)); /* heuristic: header + string + simple value */ srl_buf_cat_char_nocheck(enc,SRL_HDR_FLOAT); - Copy((char *)&f, enc->pos, sizeof(f), char); - enc->pos += sizeof(f); + Copy((char *)&f, enc->buf.pos, sizeof(f), char); + enc->buf.pos += sizeof(f); } else if (d == nv) { BUF_SIZE_ASSERT(enc, 1 + sizeof(d)); /* heuristic: header + string + simple value */ srl_buf_cat_char_nocheck(enc,SRL_HDR_DOUBLE); - Copy((char *)&d, enc->pos, sizeof(d), char); - enc->pos += sizeof(d); + Copy((char *)&d, enc->buf.pos, sizeof(d), char); + enc->buf.pos += sizeof(d); } else { BUF_SIZE_ASSERT(enc, 1 + sizeof(nv)); /* heuristic: header + string + simple value */ srl_buf_cat_char_nocheck(enc,SRL_HDR_LONG_DOUBLE); - Copy((char *)&nv, enc->pos, sizeof(nv), char); - enc->pos += sizeof(nv); + Copy((char *)&nv, enc->buf.pos, sizeof(nv), char); + enc->buf.pos += sizeof(nv); } } @@ -465,7 +559,8 @@ srl_dump_ivuv(pTHX_ srl_encoder_t *enc, SV *src) * we're just wasting some space. */ /* TODO optimize! */ - if (SvIOK_UV(src) || SvIV(src) >= 0) { /* FIXME find a way to express this without repeated SvIV/SvUV */ + /* FIXME find a way to express the condition without repeated SvIV/SvUV */ + if (expect_true( SvIOK_UV(src) || SvIV(src) >= 0 )) { const UV num = SvUV(src); /* FIXME is SvUV_nomg good enough because of the GET magic in dump_sv? SvUVX after having checked the flags? */ if (num < 16) { /* encodable as POS */ @@ -490,42 +585,112 @@ srl_dump_ivuv(pTHX_ srl_encoder_t *enc, SV *src) } } +/* Dumps the tag and class name of an object doing all necessary callbacks or + * exception-throwing. + * The provided SV must already have been identified as a Perl object + * using sv_isobject(). + * If the return value is not NULL, then it's the actual object content that + * needs to be serialized by the caller. */ +SRL_STATIC_INLINE SV * +srl_get_frozen_object(pTHX_ srl_encoder_t *enc, SV *src, SV *referent) +{ + assert(sv_isobject(src)); /* duplicate asserts are "free" */ + + /* Check for FREEZE support */ + if (expect_false( SRL_ENC_HAVE_OPTION(enc, SRL_F_ENABLE_FREEZE_SUPPORT) )) { + HV *stash = SvSTASH(referent); + GV *method = NULL; + assert(stash != NULL); + method = gv_fetchmethod_autoload(stash, "FREEZE", 0); + + if (expect_false( method != NULL )) { + SV *replacement= NULL; + PTABLE_t *freezeobj_svhash = SRL_GET_FREEZEOBJ_SVHASH(enc); + if (SvREFCNT(referent)>1) { + replacement= PTABLE_fetch(freezeobj_svhash, referent); + } + if (!replacement) { + int count; + dSP; + ENTER; + SAVETMPS; + PUSHMARK(SP); + + EXTEND(SP, 2); + PUSHs(src); + PUSHs(enc->sereal_string_sv); /* not NULL if SRL_F_ENABLE_FREEZE_SUPPORT is set */ + replacement= (SV*)newAV(); + PTABLE_store(freezeobj_svhash, referent, replacement); + + PUTBACK; + count = call_sv((SV *)GvCV(method), G_ARRAY); + /* TODO explore method lookup caching */ + SPAGAIN; + + while ( count-- > 0) { + SV *tmp = POPs; + SvREFCNT_inc(tmp); + if (!av_store((AV*)replacement,count,tmp)) + croak("Failed to push value into array"); + } + + PUTBACK; + FREETMPS; + LEAVE; + } + return replacement; + } + } + return NULL; + +} /* Outputs a bless header and the class name (as some form of string or COPY). * Caller then has to output the actual reference payload. */ SRL_STATIC_INLINE void -srl_dump_classname(pTHX_ srl_encoder_t *enc, SV *src) +srl_dump_classname(pTHX_ srl_encoder_t *enc, SV *referent, SV *replacement) { - const HV *stash = SvSTASH(src); - PTABLE_t *string_seenhash = SRL_GET_STR_PTR_SEENHASH(enc); - const ptrdiff_t oldoffset = (ptrdiff_t)PTABLE_fetch(string_seenhash, (SV *)stash); - - if (oldoffset != 0) { - /* Issue COPY instead of literal class name string */ - srl_buf_cat_varint(aTHX_ enc, SRL_HDR_OBJECTV, (UV)oldoffset); - } - else { - const char *class_name = HvNAME_get(stash); - const size_t len = HvNAMELEN_get(stash); - - /* First save this new string (well, the HV * that it is represented by) into the string - * dedupe table. - * By saving the ptr to the HV, we only dedupe class names with class names, though - * this seems a small price to pay for not having to keep a full string table. - * At least, we can safely use the same PTABLE to store the ptrs to hashkeys since - * the set of pointers will never collide. - * /me bows to Yves for the delightfully evil hack. */ - srl_buf_cat_char(enc, SRL_HDR_OBJECT); - - /* remember current offset before advancing it */ - PTABLE_store(string_seenhash, (void *)stash, (void *)(enc->pos - enc->buf_start)); - - /* HvNAMEUTF8 not in older perls and it would be 0 for those anyway */ + /* Check that we actually want to support objects */ + if (expect_false( SRL_ENC_HAVE_OPTION(enc, SRL_F_CROAK_ON_BLESS)) ) { + croak("Attempted to serialize blessed reference. Serializing objects " + "using Sereal::Encoder was explicitly disabled using the " + "'croak_on_bless' option."); + } else if (expect_false( SRL_ENC_HAVE_OPTION(enc, SRL_F_NO_BLESS_OBJECTS) )) { + return; + } else { + const HV *stash = SvSTASH(referent); + PTABLE_t *string_seenhash = SRL_GET_STR_PTR_SEENHASH(enc); + const ptrdiff_t oldoffset = (ptrdiff_t)PTABLE_fetch(string_seenhash, (SV *)stash); + + if (oldoffset != 0) { + /* Issue COPY instead of literal class name string */ + srl_buf_cat_varint(aTHX_ enc, + expect_false(replacement) ? SRL_HDR_OBJECTV_FREEZE : SRL_HDR_OBJECTV, + (UV)oldoffset); + } + else { + const char *class_name = HvNAME_get(stash); + const size_t len = HvNAMELEN_get(stash); + + /* First save this new string (well, the HV * that it is represented by) into the string + * dedupe table. + * By saving the ptr to the HV, we only dedupe class names with class names, though + * this seems a small price to pay for not having to keep a full string table. + * At least, we can safely use the same PTABLE to store the ptrs to hashkeys since + * the set of pointers will never collide. + * /me bows to Yves for the delightfully evil hack. */ + srl_buf_cat_char(enc, expect_false(replacement) ? SRL_HDR_OBJECT_FREEZE : SRL_HDR_OBJECT); + + /* remember current offset before advancing it */ + PTABLE_store(string_seenhash, (void *)stash, (void *)BODY_POS_OFS(enc->buf)); + + /* HvNAMEUTF8 not in older perls and it would be 0 for those anyway */ #if PERL_VERSION >= 16 - srl_dump_pv(aTHX_ enc, class_name, len, HvNAMEUTF8(stash)); + srl_dump_pv(aTHX_ enc, class_name, len, HvNAMEUTF8(stash)); #else - srl_dump_pv(aTHX_ enc, class_name, len, 0); + srl_dump_pv(aTHX_ enc, class_name, len, 0); #endif + } } } @@ -588,19 +753,20 @@ SRL_STATIC_INLINE void srl_reset_snappy_header_flag(srl_encoder_t *enc) { /* sizeof(const char *) includes a count of \0 */ - char *flags_and_version_byte = enc->buf_start + sizeof(SRL_MAGIC_STRING) - 1; + char *flags_and_version_byte = enc->buf.start + sizeof(SRL_MAGIC_STRING) - 1; /* disable snappy flag in header */ *flags_and_version_byte = SRL_PROTOCOL_ENCODING_RAW | (*flags_and_version_byte & SRL_PROTOCOL_VERSION_MASK); } void -srl_dump_data_structure(pTHX_ srl_encoder_t *enc, SV *src) +srl_dump_data_structure(pTHX_ srl_encoder_t *enc, SV *src, SV *user_header_src) { enc = srl_prepare_encoder(aTHX_ enc); - if (!SRL_ENC_HAVE_OPTION(enc, (SRL_F_COMPRESS_SNAPPY | SRL_F_COMPRESS_SNAPPY_INCREMENTAL))) { - srl_write_header(aTHX_ enc); + if (expect_true( !SRL_ENC_HAVE_OPTION(enc, (SRL_F_COMPRESS_SNAPPY | SRL_F_COMPRESS_SNAPPY_INCREMENTAL)) )) { + srl_write_header(aTHX_ enc, user_header_src); + SRL_UPDATE_BODY_POS(enc); srl_dump_sv(aTHX_ enc, src); srl_fixup_weakrefs(aTHX_ enc); } @@ -610,12 +776,13 @@ srl_dump_data_structure(pTHX_ srl_encoder_t *enc, SV *src) /* Alas, have to write entire packet first since the header length * will determine offsets. */ - srl_write_header(aTHX_ enc); - sereal_header_len = BUF_POS_OFS(enc); + srl_write_header(aTHX_ enc, user_header_src); + sereal_header_len = BUF_POS_OFS(enc->buf); + SRL_UPDATE_BODY_POS(enc); srl_dump_sv(aTHX_ enc, src); srl_fixup_weakrefs(aTHX_ enc); - assert(BUF_POS_OFS(enc) > sereal_header_len); - uncompressed_body_length = BUF_POS_OFS(enc) - sereal_header_len; + assert(BUF_POS_OFS(enc->buf) > sereal_header_len); + uncompressed_body_length = BUF_POS_OFS(enc->buf) - sereal_header_len; if (enc->snappy_threshold > 0 && uncompressed_body_length < (STRLEN)enc->snappy_threshold) @@ -624,9 +791,9 @@ srl_dump_data_structure(pTHX_ srl_encoder_t *enc, SV *src) srl_reset_snappy_header_flag(enc); } else { /* do snappy compression of body */ - char *old_buf; + srl_buffer_t old_buf; /* TODO can we use the enc->tmp_buf here to avoid allocations? */ char *varint_start= NULL; - char *varint_end; + char *varint_end= NULL; uint32_t dest_len; /* Get uncompressed payload and total packet output (after compression) lengths */ @@ -639,39 +806,41 @@ srl_dump_data_structure(pTHX_ srl_encoder_t *enc, SV *src) srl_init_snappy_workmem(aTHX_ enc); /* Back up old buffer and allocate new one with correct size */ - old_buf = enc->buf_start; - Newx(enc->buf_start, dest_len, char); - if (!enc->buf_start) { - enc->buf_start = old_buf; /* for cleanup */ - croak("Out of memory!"); - } - enc->pos = enc->buf_start; - enc->buf_end = enc->buf_start + dest_len; + srl_buf_copy_buffer(aTHX_ &enc->buf, &old_buf); + srl_buf_init_buffer(aTHX_ &enc->buf, dest_len); /* Copy Sereal header */ - Copy(old_buf, enc->pos, sereal_header_len, char); - enc->pos += sereal_header_len; + Copy(old_buf.start, enc->buf.pos, sereal_header_len, char); + enc->buf.pos += sereal_header_len; + SRL_UPDATE_BODY_POS(enc); /* will do the right thing wrt. protocol V1 / V2 */ /* Embed compressed packet length */ if ( SRL_ENC_HAVE_OPTION(enc, SRL_F_COMPRESS_SNAPPY_INCREMENTAL ) ) { - varint_start= enc->pos; + varint_start= enc->buf.pos; srl_buf_cat_varint_nocheck(aTHX_ enc, 0, dest_len); - varint_end= enc->pos - 1; + varint_end= enc->buf.pos - 1; } - csnappy_compress(old_buf+sereal_header_len, (uint32_t)uncompressed_body_length, enc->pos, &dest_len, + csnappy_compress(old_buf.start + sereal_header_len, (uint32_t)uncompressed_body_length, enc->buf.pos, &dest_len, enc->snappy_workmem, CSNAPPY_WORKMEM_BYTES_POWER_OF_TWO); assert(dest_len != 0); - /* overwrite the max size varint with the real size of the compressed data */ - if (varint_start) - srl_update_varint_from_to(aTHX_ varint_start, varint_end, dest_len); - - Safefree(old_buf); - enc->pos += dest_len; - assert(enc->pos <= enc->buf_end); + /* If compression didn't help, swap back to old, uncompressed buffer */ + if (dest_len >= uncompressed_body_length) { + /* swap in old, uncompressed buffer */ + srl_buf_swap_buffer(aTHX_ &enc->buf, &old_buf); + /* disable snappy flag */ + srl_reset_snappy_header_flag(enc); + } + else { /* go ahead with Snappy and do final fixups */ + /* overwrite the max size varint with the real size of the compressed data */ + if (varint_start) + srl_update_varint_from_to(aTHX_ varint_start, varint_end, dest_len); + enc->buf.pos += dest_len; + } - /* TODO If compression didn't help, swap back to old, uncompressed buffer */ + srl_buf_free_buffer(aTHX_ &old_buf); + assert(enc->buf.pos <= enc->buf.end); } /* end of "actually do snappy compression" */ } /* end of "want snappy compression?" */ @@ -694,9 +863,9 @@ srl_fixup_weakrefs(pTHX_ srl_encoder_t *enc) while ( NULL != (ent = PTABLE_iter_next(it)) ) { const ptrdiff_t offset = (ptrdiff_t)ent->value; if ( offset ) { - char *pos = enc->buf_start + offset; + char *pos = enc->buf.body_pos + offset; assert(*pos == SRL_HDR_WEAKEN); - if (DEBUGHACK) warn("setting %lu to PAD", (long unsigned int)offset); + if (DEBUGHACK) warn("setting byte at offset %lu to PAD", (long unsigned int)offset); *pos = SRL_HDR_PAD; } } @@ -800,7 +969,7 @@ srl_dump_av(pTHX_ srl_encoder_t *enc, AV *src, U32 refcount) BUF_SIZE_ASSERT(enc, 2 + SRL_MAX_VARINT_LENGTH + n); if (n < 16 && refcount == 1) { - enc->pos--; /* backup over previous REFN */ + enc->buf.pos--; /* backup over previous REFN */ srl_buf_cat_char_nocheck(enc, SRL_HDR_ARRAYREF + n); } else { /* header and num. elements */ @@ -875,7 +1044,7 @@ srl_dump_hv(pTHX_ srl_encoder_t *enc, HV *src, U32 refcount) BUF_SIZE_ASSERT(enc, 2 + SRL_MAX_VARINT_LENGTH + 3*n); if (n < 16 && refcount == 1) { - enc->pos--; /* back up over the previous REFN */ + enc->buf.pos--; /* back up over the previous REFN */ srl_buf_cat_char_nocheck(enc, SRL_HDR_HASHREF + n); } else { srl_buf_cat_varint_nocheck(aTHX_ enc, SRL_HDR_HASH, n); @@ -941,7 +1110,7 @@ srl_dump_hv(pTHX_ srl_encoder_t *enc, HV *src, U32 refcount) * + 2*n = very conservative min size of n hashkeys if all COPY */ BUF_SIZE_ASSERT(enc, 2 + SRL_MAX_VARINT_LENGTH + 3*n); if (n < 16 && refcount == 1) { - enc->pos--; /* backup over the previous REFN */ + enc->buf.pos--; /* backup over the previous REFN */ srl_buf_cat_char_nocheck(enc, SRL_HDR_HASHREF + n); } else { srl_buf_cat_varint_nocheck(aTHX_ enc, SRL_HDR_HASH, n); @@ -1006,7 +1175,7 @@ srl_dump_hk(pTHX_ srl_encoder_t *enc, HE *src, const int share_keys) } else { /* remember current offset before advancing it */ - const ptrdiff_t newoffset = enc->pos - enc->buf_start; + const ptrdiff_t newoffset = BODY_POS_OFS(enc->buf); PTABLE_store(string_seenhash, (void *)str, (void *)newoffset); } } @@ -1026,7 +1195,7 @@ SRL_STATIC_INLINE void srl_dump_svpv(pTHX_ srl_encoder_t *enc, SV *src) { STRLEN len; - const char const *str= SvPV(src, len); + const char * const str= SvPV(src, len); if ( SRL_ENC_HAVE_OPTION(enc, SRL_F_DEDUPE_STRINGS) && len > 3 ) { HV *string_deduper_hv= SRL_GET_STR_DEDUPER_HV(enc); HE *dupe_offset_he= hv_fetch_ent(string_deduper_hv, src, 1, 0); @@ -1046,7 +1215,7 @@ srl_dump_svpv(pTHX_ srl_encoder_t *enc, SV *src) return; } else { /* start tracking this string */ - sv_setuv(ofs_sv, (UV)BUF_POS_OFS(enc)); + sv_setuv(ofs_sv, (UV)BODY_POS_OFS(enc->buf)); } } } @@ -1064,10 +1233,12 @@ srl_dump_pv(pTHX_ srl_encoder_t *enc, const char* src, STRLEN src_len, int is_ut } else { srl_buf_cat_varint_nocheck(aTHX_ enc, SRL_HDR_BINARY, src_len); } - Copy(src, enc->pos, src_len, char); - enc->pos += src_len; + Copy(src, enc->buf.pos, src_len, char); + enc->buf.pos += src_len; } + + /* Dumps generic SVs and delegates * to more specialized functions for RVs, etc. */ /* TODO decide when to use the IV, when to use the PV, and when @@ -1085,13 +1256,12 @@ srl_dump_sv(pTHX_ srl_encoder_t *enc, SV *src) MAGIC *mg; AV *backrefs; SV* refsv= NULL; + SV* replacement= NULL; UV weakref_ofs= 0; /* preserved between loops */ - int nobless; SSize_t ref_rewrite_pos= 0; /* preserved between loops - note SSize_t is a perl define */ assert(src); - nobless = SRL_ENC_HAVE_OPTION(enc, SRL_F_NO_BLESS_OBJECTS); - if (++enc->recursion_depth == enc->max_recursion_depth) { + if (expect_false( ++enc->recursion_depth == enc->max_recursion_depth )) { croak("Hit maximum recursion depth (%lu), aborting serialization", (unsigned long)enc->max_recursion_depth); } @@ -1115,7 +1285,7 @@ redo_dump: if (DEBUGHACK) warn("backreferences %p", src); } #endif - if ( mg || backrefs ) { + if (expect_false( mg || backrefs )) { PTABLE_t *weak_seenhash= SRL_GET_WEAK_SEENHASH(enc); PTABLE_ENTRY_t *pe= PTABLE_find(weak_seenhash, src); if (!pe) { @@ -1154,24 +1324,38 @@ redo_dump: /* we have seen it before, so we do not need to bless it again */ if (ref_rewrite_pos) { if (DEBUGHACK) warn("ref to %p as %lu", src, (long unsigned int)oldoffset); - enc->pos= enc->buf_start + ref_rewrite_pos; + enc->buf.pos= enc->buf.body_pos + ref_rewrite_pos; srl_buf_cat_varint(aTHX_ enc, SRL_HDR_REFP, (UV)oldoffset); } else { if (DEBUGHACK) warn("alias to %p as %lu", src, (long unsigned int)oldoffset); srl_buf_cat_varint(aTHX_ enc, SRL_HDR_ALIAS, (UV)oldoffset); } - SRL_SET_FBIT(*(enc->buf_start + oldoffset)); + SRL_SET_FBIT(*(enc->buf.body_pos + oldoffset)); --enc->recursion_depth; return; } - if (DEBUGHACK) warn("storing %p as %lu", src, (long unsigned int)BUF_POS_OFS(enc)); - PTABLE_store(ref_seenhash, src, (void *)BUF_POS_OFS(enc)); + if (DEBUGHACK) warn("storing %p as %lu", src, (long unsigned int)BODY_POS_OFS(enc->buf)); + PTABLE_store(ref_seenhash, src, (void *)BODY_POS_OFS(enc->buf)); } } - if (weakref_ofs != 0) { + if (expect_false( weakref_ofs != 0 )) { sv_dump(src); croak("Corrupted weakref? weakref_ofs=0 (this should not happen)"); } + if (replacement) { + if (SvROK(replacement)) { + src= SvRV(replacement); + } else { + src= replacement; + } + replacement= NULL; + svt = SvTYPE(src); + /* plus one ensures that later on we get REFN/ARRAY and not ARRAYREF - This is horrible tho. needs to be revisited another day */ + refcount= SvREFCNT(src) + 1; + /* We could, but do not do the following:*/ + /* goto redo_dump; */ + /* Probably a "proper" solution would, but there are nits there that I dont want to chase right now. */ + } if (SvPOKp(src)) { #if defined(MODERN_REGEXP) && !defined(REGEXP_NO_LONGER_POK) /* Only need to enter here if we have rather modern regexps, but they're @@ -1212,27 +1396,24 @@ redo_dump: assert(referent); } #endif - if (SvWEAKREF(src)) { + if (expect_false( SvWEAKREF(src) )) { if (DEBUGHACK) warn("Is weakref %p", src); - weakref_ofs= BUF_POS_OFS(enc); + weakref_ofs= BODY_POS_OFS(enc->buf); srl_buf_cat_char(enc, SRL_HDR_WEAKEN); } - ref_rewrite_pos= BUF_POS_OFS(enc); - if (sv_isobject(src)) { - /* Check that we actually want to support objects */ - if (expect_false( SRL_ENC_HAVE_OPTION(enc, SRL_F_CROAK_ON_BLESS)) ) { - croak("Attempted to serialize blessed reference. Serializing objects " - "using Sereal::Encoder was explicitly disabled using the " - "'croak_on_bless' option."); - } - /* FIXME reuse/ref/... should INCLUDE the bless stuff. */ + + ref_rewrite_pos= BODY_POS_OFS(enc->buf); + + if (expect_false( sv_isobject(src) )) { /* Write bless operator with class name */ - if (!nobless) - srl_dump_classname(aTHX_ enc, referent); + replacement= srl_get_frozen_object(aTHX_ enc, src, referent); + srl_dump_classname(aTHX_ enc, referent, replacement); /* 1 == have freeze call */ } + srl_buf_cat_char(enc, SRL_HDR_REFN); refsv= src; src= referent; + if (DEBUGHACK) warn("Going to redo %p", src); goto redo_dump; } @@ -1272,7 +1453,7 @@ redo_dump: * want to serialize around for REFP and ALIAS output */ \ PTABLE_t *ref_seenhash= SRL_GET_REF_SEENHASH(enc); \ PTABLE_delete(ref_seenhash, src); \ - enc->pos= enc->buf_start + ref_rewrite_pos; \ + enc->buf.pos= enc->buf.body_pos + ref_rewrite_pos; \ } \ srl_buf_cat_char((enc), SRL_HDR_UNDEF); \ } \ @@ -1300,7 +1481,7 @@ redo_dump: * want to serialize around for REFP and ALIAS output */ \ PTABLE_t *ref_seenhash= SRL_GET_REF_SEENHASH(enc); \ PTABLE_delete(ref_seenhash, src); \ - enc->pos= enc->buf_start + ref_rewrite_pos; \ + enc->buf.pos= enc->buf.body_pos + ref_rewrite_pos; \ str = SvPV((refsv), len); \ } else \ str = SvPV((src), len); \ diff --git a/srl_encoder.h b/srl_encoder.h index 64db2bf..9fde69a 100644 --- a/srl_encoder.h +++ b/srl_encoder.h @@ -11,24 +11,30 @@ # define INITIALIZATION_SIZE 64 #endif +#include "srl_buffer_types.h" + typedef struct PTABLE * ptable_ptr; typedef struct { - char *buf_start; /* ptr to "physical" start of output buffer */ - char *buf_end; /* ptr to end of output buffer */ - char *pos; /* ptr to current position within output buffer */ + srl_buffer_t buf; + srl_buffer_t tmp_buf; /* temporary buffer for swapping */ - U32 operational_flags; /* flags that pertain to one encode run (rather than being options): See SRL_OF_* defines */ - U32 flags; /* flag-like options: See SRL_F_* defines */ - UV max_recursion_depth; /* Configurable limit on the number of recursive calls we're willing to make */ + U32 operational_flags; /* flags that pertain to one encode run (rather than being options): See SRL_OF_* defines */ + U32 flags; /* flag-like options: See SRL_F_* defines */ + UV max_recursion_depth; /* Configurable limit on the number of recursive calls we're willing to make */ - UV recursion_depth; /* current Perl-ref recursion depth */ + UV recursion_depth; /* current Perl-ref recursion depth */ ptable_ptr ref_seenhash; /* ptr table for avoiding circular refs */ ptable_ptr weak_seenhash; /* ptr table for avoiding dangling weakrefs */ ptable_ptr str_seenhash; /* ptr table for issuing COPY commands based on PTRS (used for classnames and keys) */ + ptable_ptr freezeobj_svhash; /* ptr table for tracking objects and their frozen replacments via FREEZE */ HV *string_deduper_hv; /* track strings we have seen before, by content */ - void *snappy_workmem; /* lazily allocated if and only if using Snappy */ - IV snappy_threshold; /* do not compress things smaller than this even if Snappy enabled */ + void *snappy_workmem; /* lazily allocated if and only if using Snappy */ + IV snappy_threshold; /* do not compress things smaller than this even if Snappy enabled */ + + /*HV *freeze_cb_cache;*/ /* cache of callbacks for FREEZE methods: classname => CV*. + * only used if SRL_F_ENABLE_FREEZE_SUPPORT is set. */ + SV *sereal_string_sv; /* SV that says "Sereal" for FREEZE support */ } srl_encoder_t; /* constructor from options */ @@ -42,9 +48,9 @@ void srl_clear_encoder(pTHX_ srl_encoder_t *enc); void srl_destroy_encoder(pTHX_ srl_encoder_t *enc); /* Write Sereal packet header to output buffer */ -void srl_write_header(pTHX_ srl_encoder_t *enc); +void srl_write_header(pTHX_ srl_encoder_t *enc, SV *user_header_src); /* Start dumping a top-level SV */ -void srl_dump_data_structure(pTHX_ srl_encoder_t *enc, SV *src); +void srl_dump_data_structure(pTHX_ srl_encoder_t *enc, SV *src, SV *user_header_src); /* define option bits in srl_encoder_t's flags member */ @@ -95,6 +101,12 @@ void srl_dump_data_structure(pTHX_ srl_encoder_t *enc, SV *src); * Corresponds to the 'no_bless_objects' flag found in the Decoder. */ #define SRL_F_NO_BLESS_OBJECTS 0x01000UL +/* If set in flags, then we serialize using Sereal protocol version 1. */ +#define SRL_F_USE_PROTO_V1 0x02000UL + +/* If set in flags, then support calling FREEZE method on objects. */ +#define SRL_F_ENABLE_FREEZE_SUPPORT 0x04000UL + /* Set while the encoder is in active use / dirty */ #define SRL_OF_ENCODER_DIRTY 1UL diff --git a/srl_protocol.h b/srl_protocol.h index 16af658..5b302b8 100644 --- a/srl_protocol.h +++ b/srl_protocol.h @@ -57,17 +57,17 @@ COPY | "/" | 47 | 0x2f | 0b00101111 | - copy of item defined at offset WEAKEN | "0" | 48 | 0x30 | 0b00110000 | - Weaken the following reference REGEXP | "1" | 49 | 0x31 | 0b00110001 | - RESERVED_0 | "2" | 50 | 0x32 | 0b00110010 | reserved - RESERVED_1 | "3" | 51 | 0x33 | 0b00110011 | - RESERVED_2 | "4" | 52 | 0x34 | 0b00110100 | - RESERVED_3 | "5" | 53 | 0x35 | 0b00110101 | - RESERVED_4 | "6" | 54 | 0x36 | 0b00110110 | - RESERVED_5 | "7" | 55 | 0x37 | 0b00110111 | - RESERVED_6 | "8" | 56 | 0x38 | 0b00111000 | - RESERVED_7 | "9" | 57 | 0x39 | 0b00111001 | reserved + OBJECT_FREEZE | "2" | 50 | 0x32 | 0b00110010 | - class, object-item. Need to call "THAW" method on class after decoding + OBJECTV_FREEZE | "3" | 51 | 0x33 | 0b00110011 | - (OBJECTV_FREEZE is to OBJECT_FREEZE as OBJECTV is to OBJECT) + RESERVED_0 | "4" | 52 | 0x34 | 0b00110100 | reserved + RESERVED_1 | "5" | 53 | 0x35 | 0b00110101 | + RESERVED_2 | "6" | 54 | 0x36 | 0b00110110 | + RESERVED_3 | "7" | 55 | 0x37 | 0b00110111 | + RESERVED_4 | "8" | 56 | 0x38 | 0b00111000 | + RESERVED_5 | "9" | 57 | 0x39 | 0b00111001 | reserved FALSE | ":" | 58 | 0x3a | 0b00111010 | false (PL_sv_no) TRUE | ";" | 59 | 0x3b | 0b00111011 | true (PL_sv_yes) - MANY | "<" | 60 | 0x3c | 0b00111100 | - repeated tag (not done yet, will be implemented in version 2) + MANY | "<" | 60 | 0x3c | 0b00111100 | - repeated tag (not done yet, will be implemented in version 3) PACKET_START | "=" | 61 | 0x3d | 0b00111101 | (first byte of magic string in header) EXTEND | ">" | 62 | 0x3e | 0b00111110 | - for additional tags PAD | "?" | 63 | 0x3f | 0b00111111 | (ignored tag, skip to next byte) @@ -144,7 +144,7 @@ #define SRL_MAGIC_STRLEN 4 /* Length of SRL_MAGIC_STRING */ #define SRL_MAGIC_STRING_LILIPUTIAN 0x6c72733d /* SRL_MAGIC_STRING as a little endian integer */ -#define SRL_PROTOCOL_VERSION ( 1 ) /* this is the first. for some reason we did not use 0 */ +#define SRL_PROTOCOL_VERSION ( 2 ) #define SRL_PROTOCOL_VERSION_BITS ( 4 ) /* how many bits we use for the version, the rest go to the encoding */ #define SRL_PROTOCOL_VERSION_MASK ( ( 1 << SRL_PROTOCOL_VERSION_BITS ) - 1 ) @@ -153,7 +153,9 @@ #define SRL_PROTOCOL_ENCODING_SNAPPY ( 1 << SRL_PROTOCOL_VERSION_BITS ) #define SRL_PROTOCOL_ENCODING_SNAPPY_INCREMENTAL ( 2 << SRL_PROTOCOL_VERSION_BITS ) - +/* Bits in the header bitfield */ +#define SRL_PROTOCOL_HDR_USER_DATA ( 1 ) +#define SRL_PROTOCOL_HDR_CONTINUE ( 8 ) /* TODO Describe in spec - not urgent since not meaningful yet */ /* Useful constants */ /* See also range constants below for the header byte */ @@ -193,16 +195,19 @@ #define SRL_HDR_WEAKEN ((char)48) /* - Weaken the following reference */ #define SRL_HDR_REGEXP ((char)49) /* */ +#define SRL_HDR_OBJECT_FREEZE ((char)50) /* - class, object-item. Need to call "THAW" method on class after decoding */ +#define SRL_HDR_OBJECTV_FREEZE ((char)51) /* - (OBJECTV_FREEZE is to OBJECT_FREEZE as OBJECTV is to OBJECT) */ + /* Note: Can do reserved check with a range now, but as we start using * them, might have to explicit == check later. */ -#define SRL_HDR_RESERVED ((char)50) /* reserved */ -#define SRL_HDR_RESERVED_LOW ((char)50) +#define SRL_HDR_RESERVED ((char)52) /* reserved */ +#define SRL_HDR_RESERVED_LOW ((char)52) #define SRL_HDR_RESERVED_HIGH ((char)57) #define SRL_HDR_FALSE ((char)58) /* false (PL_sv_no) */ #define SRL_HDR_TRUE ((char)59) /* true (PL_sv_yes) */ -#define SRL_HDR_MANY ((char)60) /* - repeated tag (not done yet, will be implemented in version 2) */ +#define SRL_HDR_MANY ((char)60) /* - repeated tag (not done yet, will be implemented in version 3) */ #define SRL_HDR_PACKET_START ((char)61) /* (first byte of magic string in header) */ diff --git a/t/010_desperate.t b/t/010_desperate.t index 984b914..5512389 100644 --- a/t/010_desperate.t +++ b/t/010_desperate.t @@ -29,6 +29,7 @@ done_testing(); sub run_tests { my ($extra_name, $opt_hash) = @_; + setup_tests(2); foreach my $bt (@BasicTests) { my (undef, $exp, $name) = @$bt; @@ -36,7 +37,7 @@ sub run_tests { $name="unnamed" if not defined $name; #next unless $name=~/PAD/; - $exp = "$Header$exp"; + $exp = Header(). $exp; my $enc = Sereal::Encoder->new($opt_hash ? $opt_hash : ()); my $out; eval{ diff --git a/t/100_roundtrip.t b/t/100_roundtrip.t index c871e73..2889ce9 100644 --- a/t/100_roundtrip.t +++ b/t/100_roundtrip.t @@ -29,7 +29,7 @@ if (not $ok) { plan skip_all => 'Did not find right version of decoder'; } else { - run_roundtrip_tests(); + run_roundtrip_tests(2); # 2 is "run for proto version 2 only" } diff --git a/t/101_roundtrip_v1.t b/t/101_roundtrip_v1.t new file mode 100644 index 0000000..3c065f1 --- /dev/null +++ b/t/101_roundtrip_v1.t @@ -0,0 +1,38 @@ +#!perl +use strict; +use warnings; +use Sereal::Encoder; +use Data::Dumper; +use File::Spec; + +# These tests use an installed Decoder (or respectively Encoder) to do +# round-trip testing. There are two strategies, both with drawbacks: +# - Test::More's is_deeply is waaaay too lenient to catch all the +# subtleties that Sereal is supposed to encode. +# - Serialize - Deserialize - Serialize, then do a string compare. +# This won't catch if the first serialization has bogus output +# but the subsequent de- & serialization work for the already +# bogus output. +# These tests can't replace carefully crafted manual tests, I fear. + +use lib File::Spec->catdir(qw(t lib)); +BEGIN { + lib->import('lib') + if !-d 't'; +} + +use Sereal::TestSet qw(:all); +use Test::More; + +my $ok = have_encoder_and_decoder(); +if (not $ok) { + plan skip_all => 'Did not find right version of decoder'; +} +else { + run_roundtrip_tests(1); # 1 is "run for proto-version 1 only" +} + + +pass(); +done_testing(); + diff --git a/t/120_hdr_data.t b/t/120_hdr_data.t new file mode 100644 index 0000000..809285d --- /dev/null +++ b/t/120_hdr_data.t @@ -0,0 +1,41 @@ +#!perl +use strict; +use warnings; +use Sereal::Encoder qw(:all); +use File::Spec; +use Scalar::Util qw( blessed ); +use lib File::Spec->catdir(qw(t lib)); +BEGIN { + lib->import('lib') + if !-d 't'; +} + +use Sereal::TestSet qw(:all); +use Test::More; + +my $ref = Header(2, chr(0b0000_1100)) . chr(0b0001_0000); # -16 in body, 12 in header +is(encode_sereal_with_header_data(-16, 12), $ref, "Encode 12 in header, -16 in body"); +is(Sereal::Encoder->new->encode(-16, 12), $ref, "OO: Encode 12 in header, -16 in body"); + +my $ok = have_encoder_and_decoder(); +if (not $ok) { + SKIP: { skip 'Did not find right version of decoder' => 1 } +} +else { + my $dec = Sereal::Decoder->new; + my $encoded = encode_sereal_with_header_data(-16, 12); + my $decoded = $dec->decode($encoded); + is($decoded, -16, "-16 decoded correctly"); + $decoded = $dec->decode_only_header($encoded); + is($decoded, 12, "12 decoded correctly"); + + my $munged = "X" . $encoded; + $decoded = $dec->decode_with_offset($munged, 1); + is($decoded, -16, "-16 decoded correctly (offset)"); + $decoded = $dec->decode_only_header_with_offset($munged, 1); + is($decoded, 12, "12 decoded correctly (offset)"); +} + +pass("Alive at end"); +done_testing(); + diff --git a/t/130_freezethaw.t b/t/130_freezethaw.t new file mode 100644 index 0000000..bc88d5a --- /dev/null +++ b/t/130_freezethaw.t @@ -0,0 +1,132 @@ +#!perl +use strict; +use warnings; +# most be loaded before Sereal::TestSet +use Sereal::Encoder qw(encode_sereal); +use Sereal::Encoder::Constants qw(:all); +use File::Spec; +use Test::More; +use Data::Dumper; + +use lib File::Spec->catdir(qw(t lib)); +BEGIN { + lib->import('lib') + if !-d 't'; +} + +use Sereal::TestSet qw(:all); + +my $thaw_called = 0; +my $freeze_called = 0; + +package Foo; +sub new { + my $class = shift; + return bless({bar => 1, @_} => $class); +} + +sub FREEZE { + my ($self, $serializer) = @_; + $freeze_called = $serializer eq 'Sereal' ? 1 : 0; + return "frozen object", 12, [2]; +} + +sub THAW { + my ($class, $serializer, @data) = @_; + $thaw_called = $serializer eq 'Sereal' ? 1 : 0; + Test::More::is_deeply(\@data, ["frozen object", 12, [2]], "Array of frozen values roundtrips"); + + return Foo->new(); +} + +package Bar; +sub new { + my $class = shift; + return bless({bar => 1, @_} => $class); +} + +sub FREEZE { + my ($self, $serializer) = @_; + return "frozen Bar"; +} + +package main; + +my $enc = Sereal::Encoder->new({freeze_callbacks => 1}); +my $srl = $enc->encode(Foo->new()); +ok($freeze_called, "FREEZE was invoked"); + + +my $run_decoder_tests = have_encoder_and_decoder(); +if (not $run_decoder_tests) { + done_testing(); + exit; +} + + +# Simple round-trip test +my $dec = Sereal::Decoder->new; +my $obj = $dec->decode($srl); +ok(defined($obj)); +isa_ok($obj, "Foo"); +is(eval{$obj->{bar}}, 1) or diag Dumper($obj); + +# Test referential integrity +my $foo = Foo->new; +my $data = [$foo, $foo]; +$srl = $enc->encode($data); +ok($srl =~ /frozen object/); + +my $out = $dec->decode($srl); +is_deeply($out, $data, "Roundtrip works"); + +cmp_ok($out->[0], "eq", $out->[1], + "Referential integrity: multiple RVs do not turn into clones") + or diag(Dumper($data,$out)); + +my $barobj = Bar->new; +$srl = $enc->encode($barobj); +ok(not(eval {$dec->decode($srl); 1}), "Decoding without THAW barfs"); + + +# Multiple-object-same-class test from Christian Hansen + +{ + package MyObject; + + sub from_num { + my ($class, $num) = @_; + return bless { num => $num }, $class; + } + + sub num { + my ($self) = @_; + return $self->{num}; + } + + sub FREEZE { + return $_[0]->num; + } + + sub THAW { + my ($class, undef, $num) = @_; + return $class->from_num($num); + } +} + +my @objects = map { MyObject->from_num($_) } (10, 20, 30); +my $encoded = encode_sereal([ @objects ], { freeze_callbacks => 1 }); +my $decoded = Sereal::Decoder::decode_sereal($encoded); + +isa_ok($decoded, 'ARRAY'); +is(scalar @$decoded, 3, 'array has three elements'); +isa_ok($decoded->[0], 'MyObject', 'first element'); +isa_ok($decoded->[1], 'MyObject', 'second element'); +isa_ok($decoded->[2], 'MyObject', 'third element'); + +is($decoded->[0]->num, 10, 'first MyObject->num'); +is($decoded->[1]->num, 20, 'second MyObject->num'); +is($decoded->[2]->num, 30, 'third MyObject->num'); + + +done_testing(); diff --git a/t/300_fail.t b/t/300_fail.t index 7565d88..cd657e2 100644 --- a/t/300_fail.t +++ b/t/300_fail.t @@ -24,14 +24,14 @@ SCOPE: { croak_on_bless => 1, }); - is($e->encode(1), $Header.integer(1), "Encoder works before exception"); + is($e->encode(1), Header().integer(1), "Encoder works before exception"); $ok = eval{$out = $e->encode(bless({}, "Foo")); 1}; $err = $@ || 'Zombie error'; ok(!$ok, "Object throws exception"); ok($err =~ /object/i, 'Exception refers to object'); - is($e->encode(1), $Header.integer(1), "Encoder works after exception"); + is($e->encode(1), Header().integer(1), "Encoder works after exception"); $ok = eval {$out = $e->encode({}); 1}; ok($ok, "Non-blessed hash does not throw exception"); @@ -47,7 +47,7 @@ SCOPE: { $ok = eval {$out = $e->encode(sub{}); 1}; $err = $@ || 'Zombie error'; ok($ok, "undef_unknown makes CODE encoding not fail"); - is($out, $Header . chr(SRL_HDR_UNDEF), "output is undef") + is($out, Header() . chr(SRL_HDR_UNDEF), "output is undef") or do { hobodecode($out) if $ENV{DEBUG_SEREAL}; } diff --git a/t/lib/Sereal/TestSet.pm b/t/lib/Sereal/TestSet.pm index ebd9017..ef2e90a 100644 --- a/t/lib/Sereal/TestSet.pm +++ b/t/lib/Sereal/TestSet.pm @@ -34,7 +34,8 @@ BEGIN { use Exporter; our @ISA = qw(Exporter); our @EXPORT_OK = qw( - $Header @BasicTests $Class $ConstClass + @BasicTests $Class $ConstClass + Header FBIT hobodecode integer short_string varint array array_fbit @@ -43,6 +44,7 @@ our @EXPORT_OK = qw( run_roundtrip_tests write_test_files $use_objectv + setup_tests ); our %EXPORT_TAGS = (all => \@EXPORT_OK); @@ -136,331 +138,382 @@ sub varint { return $out; } -our $Header = SRL_MAGIC_STRING . chr(SRL_PROTOCOL_VERSION) . chr(0); - -my $ary_ref_for_repeating = [5,6]; -my $scalar_ref_for_repeating = \9; - -my $weak_thing; $weak_thing = [\$weak_thing, 1]; weaken($weak_thing->[0]); - -my $unicode1= "Ba\xDF Ba\xDF"; my $unicode2= "\x{168}nix! \x{263a}"; utf8::upgrade($unicode1); utf8::upgrade($unicode2); - - -our @BasicTests = ( - # warning: this hardcodes the POS/NEG headers - [-16, chr(0b0001_0000), "encode -16"], - [-1, chr(0b0001_1111), "encode -1"], - [0, chr(0b0000_0000), "encode 0"], - [1, chr(0b0000_0001), "encode 1"], - [15, chr(0b0000_1111), "encode 15"], - [undef, chr(SRL_HDR_UNDEF), "encode undef"], - ["", short_string(""), "encode empty string"], - ["1", short_string("1"), "encode string '1'"], - ["91a", short_string("91a"), "encode string '91a'"], - ["abc" x 1000, chr(SRL_HDR_BINARY).varint(3000).("abc" x 1000), "long ASCII string"], - [\1, chr(SRL_HDR_REFN).chr(0b0000_0001), "scalar ref to int"], - [[], array(), "empty array ref"], - [[1,2,3], array(chr(0b0000_0001), chr(0b0000_0010), chr(0b0000_0011)), "array ref"], - [1000, chr(SRL_HDR_VARINT).varint(1000), "large int"], - [ [1..1000], - array( - (map chr, (1 .. SRL_POS_MAX_SIZE)), - (map chr(SRL_HDR_VARINT) . varint($_), ((SRL_POS_MAX_SIZE+1) .. 1000)) - ), - "array ref with pos and varints" - ], - - [{}, hash(), "empty hash ref"], - [{foo => "baaaaar"}, hash(short_string("foo"),short_string("baaaaar")), "simple hash ref"], - [ - [qw(foooo foooo foooo)], - sub { - my $opt = shift; - if ($opt->{dedupe_strings} || $opt->{aliased_dedupe_strings}) { - my $d = array_head(3); - my $pos = length($Header) + length($d); - my $tag = $opt->{aliased_dedupe_strings} ? SRL_HDR_ALIAS : SRL_HDR_COPY; - $d .= short_string("foooo") . chr($tag) . varint($pos) - . chr($tag) . varint($pos); - return $d; - } - else { - return array(short_string("foooo"),short_string("foooo"), short_string("foooo")); - } - }, - "ary ref with repeated string" - ], - [ - [{foooo => "barrr"}, {barrr => "foooo"}], - array(hash(short_string("foooo"), short_string("barrr")), - hash(short_string("barrr"), short_string("foooo"))), - "ary ref of hash refs without repeated strings" - ], - [ - [{foooo => "foooo"}, {foooo2 => "foooo"}], - sub { - my $opt = shift; - if ($opt->{dedupe_strings} || $opt->{aliased_dedupe_strings}) { - my $tag = $opt->{aliased_dedupe_strings} ? SRL_HDR_ALIAS : SRL_HDR_COPY; - my $d = array_head(2) . hash_head(2) . short_string("foooo"); - my $pos = length($Header) + length($d); - $d .= short_string("foooo") . hash_head(2) - . short_string("foooo2") - . chr($tag) . varint($pos); - return $d; - } - else { - return array(hash(short_string("foooo"), short_string("foooo")), - hash(short_string("foooo2"), short_string("foooo"))), - } - }, - "ary ref of hash refs with repeated strings" - ], - [$scalar_ref_for_repeating, chr(SRL_HDR_REFN).chr(0b0000_1001), "scalar ref to constant"], - [[$scalar_ref_for_repeating, $scalar_ref_for_repeating], - do { - my $content = array_head(2); - $content .= chr(SRL_HDR_REFN); - my $pos = length($Header) + length($content); - $content .= chr(0b1000_1001) - .chr(SRL_HDR_REFP) - .varint($pos) - ; - $content - }, "repeated substructure (REFP): scalar ref"], - [[$ary_ref_for_repeating, $ary_ref_for_repeating], - do { - my $content = array_head(2); - my $pos = length($Header) + length($content) + 1; - $content .= array_fbit(chr(0b0000_0101), chr(0b0000_0110)) - .chr(SRL_HDR_REFP) - .varint($pos) - ; - $content - }, "repeated substructure (REFP): array"], - [[\$ary_ref_for_repeating, [1, $ary_ref_for_repeating]], - do { - my $content = array_head(2) . chr(SRL_HDR_REFN); - my $pos = length($Header) + length($content) + 1; - $content .= array_fbit( - chr(0b0000_0101), - chr(0b0000_0110) - ) - . array( - chr(0b0000_0001), - chr(SRL_HDR_REFP) . varint($pos) - ) - ; - $content - }, "repeated substructure (REFP): asymmetric"], - [ - $weak_thing, - chr(SRL_HDR_REFN) . chr(SRL_HDR_ARRAY + FBIT) . varint(2) - .chr(SRL_HDR_PAD) - .chr(SRL_HDR_REFN) - .chr(SRL_HDR_REFP) - .varint(length($Header)+1) - .chr(0b0000_0001) - , - "weak thing copy (requires PAD)" - ], - [ - \$weak_thing, - chr(SRL_HDR_REFN) - . chr(SRL_HDR_REFN + FBIT) - . chr(SRL_HDR_ARRAY) . varint(2) - .chr(SRL_HDR_WEAKEN) . chr(SRL_HDR_REFP) . varint(length($Header)+1) - .chr(0b0000_0001) - , - "weak thing ref" - ], - sub { \@_ } ->( - $weak_thing, - chr(SRL_HDR_REFN + FBIT) - .chr(SRL_HDR_ARRAY).varint(2) - .chr(SRL_HDR_WEAKEN).chr(SRL_HDR_REFP).varint(length($Header)) - .chr(0b0000_0001) - , - "weak thing (aliased root)" - ), - [ - do { my @array; $array[0]=\$array[1]; $array[1]=\$array[0]; \@array }, - do { - my $content= array_head(2); - my $pos= length($content); - $content - . chr(SRL_HDR_REFN + FBIT) - . chr(SRL_HDR_REFP + FBIT) - . varint(length($Header) + $pos ) - . chr(SRL_HDR_ALIAS) - . varint(length($Header) + $pos + 1) - }, - "scalar cross" - ], - [ - do { my @array; $array[0]=\$array[1]; $array[1]=\$array[0]; weaken($array[1]); weaken($array[0]); \@array }, - do { - my $content= array_head(2); - my $pos= length($content); - $content - . chr(SRL_HDR_WEAKEN + FBIT) - . chr(SRL_HDR_REFN) - . chr(SRL_HDR_WEAKEN + FBIT) - . chr(SRL_HDR_REFP) - . varint(length($Header)+$pos) - . chr(SRL_HDR_ALIAS) - . varint(length($Header)+$pos+2) - }, - "weak scalar cross" - ], - [ - bless([],"foo"), - dump_bless(array(), "foo"), - "bless [], 'foo' (2)" - ], - [ - do { my $qr= bless qr/foo/ix,"bar"; [ $qr, $qr ] }, - do { - my $content= array_head(2); - my $pos= length($content); - join("", $content, - chr(SRL_HDR_OBJECT), - short_string("bar"), - chr(SRL_HDR_REFN), - chr(SRL_HDR_REGEXP + FBIT), - short_string("foo"), - short_string("ix"), - chr(SRL_HDR_REFP), - varint(length($Header) + $pos + 6 ), - ) - }, - "blessed regexp with reuse" - ], - [ - do { my $o1=bless [], "foo"; my $o2=bless [], "foo"; [ $o1, $o2, $o1, $o2 ] }, - do { - my $content= array_head(4). chr(SRL_HDR_OBJECT); - my $pos= length($content); - join("",$content, - short_string("foo"), - chr(SRL_HDR_REFN).chr(SRL_HDR_ARRAY + FBIT),varint(0), - chr( SRL_HDR_OBJECT + $use_objectv), - $use_objectv ? () : chr(SRL_HDR_COPY), varint(length($Header) + $pos), - chr(SRL_HDR_REFN).chr(SRL_HDR_ARRAY + FBIT), varint(0), - chr(SRL_HDR_REFP),varint(length($Header) + $pos + 5), - chr(SRL_HDR_REFP),varint(length($Header) + $pos + 10), - ) - }, - "blessed arrays with reuse" - ], - [ - [bless([], "foo"), bless([], "foo")], - do { - my $content = array_head(2) . chr(SRL_HDR_OBJECT); - my $pos = length($Header) + length($content); - $content .= short_string("foo") - . array() - . dump_bless( array(), \$pos ) - ; - $content - }, - "reused classname empty array" - ], - [ - bless([bless {}, "foo"], "foo"), - do { - my $content = chr(SRL_HDR_OBJECT); - my $pos = length($Header) + length($content); - $content .= short_string("foo") - . array_head(1) - . dump_bless(hash(), \$pos); - ; - $content - }, - "wrapped objects" - ], - [ - qr/foo/, - dump_bless( - chr(SRL_HDR_REFN) - .chr(SRL_HDR_REGEXP) - .short_string("foo") - .short_string(""), - "Regexp" - ), - "qr/foo/" - ], - [ - qr/(?i-xsm:foo)/, - dump_bless( - chr(SRL_HDR_REFN) - .chr(SRL_HDR_REGEXP) - .short_string("(?i-xsm:foo)") - .short_string(""), - "Regexp" - ), - "qr/(?i-xsm:foo)/" - ], - [ - qr/foo/i, - dump_bless( +our $PROTO_VERSION; + +sub Header { + my $proto_version = shift || $PROTO_VERSION; + my $user_data_blob = shift; + my $hdr_base = SRL_MAGIC_STRING . chr($proto_version||SRL_PROTOCOL_VERSION); + if (defined $user_data_blob) { + return $hdr_base . varint(1 + length($user_data_blob)) . chr(1) . $user_data_blob; + } + else { + return $hdr_base . chr(0); + } +} + +sub offset { + my ($str)= @_; + Carp::confess("no protoversion") if !defined $PROTO_VERSION; + if ($PROTO_VERSION >= 2) { + return length($str)+1; + } else { + return length($str) + length Header($PROTO_VERSION); + } +} + +sub offseti { + my ( $i )= @_; + if ($PROTO_VERSION >= 2) { + return $i + 1; + } else { + return $i + length Header($PROTO_VERSION); + } +} + +sub debug_checks { + my ($data_ref, $encoded_ref, $decoded_ref) = @_; + if (defined $ENV{DEBUG_SEREAL}) { + note("Original data was: " . Data::Dumper::Dumper($$data_ref)) if defined $data_ref; + note("Encoded data is: " . (defined($$encoded_ref) ? $$encoded_ref : "")) if defined $encoded_ref; + note("Decoded data was: " . Data::Dumper::Dumper($$decoded_ref)) if defined $decoded_ref; + } + if (defined $ENV{DEBUG_DUMP}) { + Dump($$encoded_ref) if defined $encoded_ref; + Dump($$decoded_ref) if defined $decoded_ref; + } + if (defined $ENV{DEBUG_HOBO}) { + hobodecode($$encoded_ref) if defined $encoded_ref; + } + exit() if $ENV{DEBUG_FAIL_FATAL}; +} + +our @BasicTests; +sub setup_tests { + my ($proto_version)=@_; + $PROTO_VERSION= $proto_version if defined $proto_version; + my $ary_ref_for_repeating = [5,6]; + my $scalar_ref_for_repeating = \9; + + my $weak_thing; $weak_thing = [\$weak_thing, 1]; weaken($weak_thing->[0]); + + my $unicode1= "Ba\xDF Ba\xDF"; my $unicode2= "\x{168}nix! \x{263a}"; utf8::upgrade($unicode1); utf8::upgrade($unicode2); + + + @BasicTests = ( + # warning: this hardcodes the POS/NEG headers + [-16, chr(0b0001_0000), "encode -16"], + [-1, chr(0b0001_1111), "encode -1"], + [0, chr(0b0000_0000), "encode 0"], + [1, chr(0b0000_0001), "encode 1"], + [15, chr(0b0000_1111), "encode 15"], + [undef, chr(SRL_HDR_UNDEF), "encode undef"], + ["", short_string(""), "encode empty string"], + ["1", short_string("1"), "encode string '1'"], + ["91a", short_string("91a"), "encode string '91a'"], + ["abc" x 1000, chr(SRL_HDR_BINARY).varint(3000).("abc" x 1000), "long ASCII string"], + [\1, chr(SRL_HDR_REFN).chr(0b0000_0001), "scalar ref to int"], + [[], array(), "empty array ref"], + [[1,2,3], array(chr(0b0000_0001), chr(0b0000_0010), chr(0b0000_0011)), "array ref"], + [1000, chr(SRL_HDR_VARINT).varint(1000), "large int"], + [ [1..1000], + array( + (map chr, (1 .. SRL_POS_MAX_SIZE)), + (map chr(SRL_HDR_VARINT) . varint($_), ((SRL_POS_MAX_SIZE+1) .. 1000)) + ), + "array ref with pos and varints" + ], + + [{}, hash(), "empty hash ref"], + [{foo => "baaaaar"}, hash(short_string("foo"),short_string("baaaaar")), "simple hash ref"], + [ + [qw(foooo foooo foooo)], + sub { + my $opt = shift; + if ($opt->{dedupe_strings} || $opt->{aliased_dedupe_strings}) { + my $d = array_head(3); + my $pos = offset($d); + my $tag = $opt->{aliased_dedupe_strings} ? SRL_HDR_ALIAS : SRL_HDR_COPY; + $d .= short_string("foooo") . chr($tag) . varint($pos) + . chr($tag) . varint($pos); + return $d; + } + else { + return array(short_string("foooo"),short_string("foooo"), short_string("foooo")); + } + }, + "ary ref with repeated string" + ], + [ + [{foooo => "barrr"}, {barrr => "foooo"}], + array(hash(short_string("foooo"), short_string("barrr")), + hash(short_string("barrr"), short_string("foooo"))), + "ary ref of hash refs without repeated strings" + ], + [ + [{foooo => "foooo"}, {foooo2 => "foooo"}], + sub { + my $opt = shift; + if ($opt->{dedupe_strings} || $opt->{aliased_dedupe_strings}) { + my $tag = $opt->{aliased_dedupe_strings} ? SRL_HDR_ALIAS : SRL_HDR_COPY; + my $d = array_head(2) . hash_head(2) . short_string("foooo"); + my $pos = offset($d); + $d .= short_string("foooo") . hash_head(2) + . short_string("foooo2") + . chr($tag) . varint($pos); + return $d; + } + else { + return array(hash(short_string("foooo"), short_string("foooo")), + hash(short_string("foooo2"), short_string("foooo"))), + } + }, + "ary ref of hash refs with repeated strings" + ], + [$scalar_ref_for_repeating, chr(SRL_HDR_REFN).chr(0b0000_1001), "scalar ref to constant"], + [[$scalar_ref_for_repeating, $scalar_ref_for_repeating], + do { + my $content = array_head(2); + $content .= chr(SRL_HDR_REFN); + my $pos = offset($content); + $content .= chr(0b1000_1001) + .chr(SRL_HDR_REFP) + .varint($pos) + ; + $content + }, "repeated substructure (REFP): scalar ref"], + [[$ary_ref_for_repeating, $ary_ref_for_repeating], + do { + my $content = array_head(2); + my $pos = offset($content) + 1; + $content .= array_fbit(chr(0b0000_0101), chr(0b0000_0110)) + .chr(SRL_HDR_REFP) + .varint($pos) + ; + $content + }, "repeated substructure (REFP): array"], + [[\$ary_ref_for_repeating, [1, $ary_ref_for_repeating]], + do { + my $content = array_head(2) . chr(SRL_HDR_REFN); + my $pos = offset($content) + 1; + $content .= array_fbit( + chr(0b0000_0101), + chr(0b0000_0110) + ) + . array( + chr(0b0000_0001), + chr(SRL_HDR_REFP) . varint($pos) + ) + ; + $content + }, "repeated substructure (REFP): asymmetric"], + [ + $weak_thing, + chr(SRL_HDR_REFN) + . chr(SRL_HDR_ARRAY + FBIT) . varint(2) + . chr(SRL_HDR_PAD) . chr(SRL_HDR_REFN) + . chr(SRL_HDR_REFP) . varint(offseti(1)) + . chr(0b0000_0001) + , + "weak thing copy (requires PAD)" + ], + [ + \$weak_thing, chr(SRL_HDR_REFN) - .chr(SRL_HDR_REGEXP) - .short_string("foo") - .short_string("i"), - "Regexp" + . chr(SRL_HDR_REFN + FBIT) + . chr(SRL_HDR_ARRAY) . varint(2) + .chr(SRL_HDR_WEAKEN) . chr(SRL_HDR_REFP) . varint(offseti(1)) + .chr(0b0000_0001) + , + "weak thing ref" + ], + sub { \@_ } ->( + $weak_thing, + chr(SRL_HDR_REFN + FBIT) + .chr(SRL_HDR_ARRAY).varint(2) + .chr(SRL_HDR_WEAKEN).chr(SRL_HDR_REFP).varint(offseti(0)) + .chr(0b0000_0001) + , + "weak thing (aliased root)" ), - "qr/foo/i" - ], - [ - [{foo => 1}, {foo => 2}], - sub { - my $opt = shift; - if ($opt->{no_shared_hashkeys}) { - return array( - hash( - short_string("foo"), - integer(1), - ), - hash( - short_string("foo"), - integer(2), - ), - ); - } - else { + [ + do { my @array; $array[0]=\$array[1]; $array[1]=\$array[0]; \@array }, + do { my $content= array_head(2); - return join( - "", - $content, - hash( - short_string("foo"), - integer(1), - ), - hash( - chr(SRL_HDR_COPY) . varint(length($Header)+length($content)+1), - integer(2), - ), + my $pos= offset($content); + $content + . chr(SRL_HDR_REFN + FBIT) + . chr(SRL_HDR_REFP + FBIT) + . varint( $pos ) + . chr(SRL_HDR_ALIAS) + . varint($pos + 1) + }, + "scalar cross" + ], + [ + do { my @array; $array[0]=\$array[1]; $array[1]=\$array[0]; weaken($array[1]); weaken($array[0]); \@array }, + do { + my $content= array_head(2); + my $pos= offset($content); + $content + . chr(SRL_HDR_WEAKEN + FBIT) + . chr(SRL_HDR_REFN) + . chr(SRL_HDR_WEAKEN + FBIT) + . chr(SRL_HDR_REFP) + . varint($pos) + . chr(SRL_HDR_ALIAS) + . varint($pos+2) + }, + "weak scalar cross" + ], + [ + bless([],"foo"), + dump_bless(array(), "foo"), + "bless [], 'foo' (2)" + ], + [ + do { my $qr= bless qr/foo/ix,"bar"; [ $qr, $qr ] }, + do { + my $content= array_head(2); + my $pos= offset($content); + join("", $content, + chr(SRL_HDR_OBJECT), + short_string("bar"), + chr(SRL_HDR_REFN), + chr(SRL_HDR_REGEXP + FBIT), + short_string("foo"), + short_string("ix"), + chr(SRL_HDR_REFP), + varint($pos + 6 ), ) - } - }, - "duplicate hash keys" - ], - [ - { $unicode1 => $unicode2 }, - hash( - chr(SRL_HDR_STR_UTF8) . varint(bytes::length($unicode1)) . encode_utf8($unicode1), - chr(SRL_HDR_STR_UTF8) . varint(bytes::length($unicode2)) . encode_utf8($unicode2), - ), - "simple unicode hash key and value" - ], - [ - sub { \@_ }->(!1,!0), - array(chr(SRL_HDR_FALSE),chr(SRL_HDR_TRUE)), - "true/false" - ] -); - + }, + "blessed regexp with reuse" + ], + [ + do { my $o1=bless [], "foo"; my $o2=bless [], "foo"; [ $o1, $o2, $o1, $o2 ] }, + do { + my $content= array_head(4). chr(SRL_HDR_OBJECT); + my $pos= offset($content); + join("",$content, + short_string("foo"), + chr(SRL_HDR_REFN).chr(SRL_HDR_ARRAY + FBIT),varint(0), + chr( SRL_HDR_OBJECT + $use_objectv), + $use_objectv ? () : chr(SRL_HDR_COPY), varint($pos), + chr(SRL_HDR_REFN).chr(SRL_HDR_ARRAY + FBIT), varint(0), + chr(SRL_HDR_REFP),varint($pos + 5), + chr(SRL_HDR_REFP),varint($pos + 10), + ) + }, + "blessed arrays with reuse" + ], + [ + [bless([], "foo"), bless([], "foo")], + do { + my $content = array_head(2) . chr(SRL_HDR_OBJECT); + my $pos = offset($content); + $content .= short_string("foo") + . array() + . dump_bless( array(), \$pos ) + ; + $content + }, + "reused classname empty array" + ], + [ + bless([bless {}, "foo"], "foo"), + do { + my $content = chr(SRL_HDR_OBJECT); + my $pos = offset($content); + $content .= short_string("foo") + . array_head(1) + . dump_bless(hash(), \$pos); + ; + $content + }, + "wrapped objects" + ], + [ + qr/foo/, + dump_bless( + chr(SRL_HDR_REFN) + .chr(SRL_HDR_REGEXP) + .short_string("foo") + .short_string(""), + "Regexp" + ), + "qr/foo/" + ], + [ + qr/(?i-xsm:foo)/, + dump_bless( + chr(SRL_HDR_REFN) + .chr(SRL_HDR_REGEXP) + .short_string("(?i-xsm:foo)") + .short_string(""), + "Regexp" + ), + "qr/(?i-xsm:foo)/" + ], + [ + qr/foo/i, + dump_bless( + chr(SRL_HDR_REFN) + .chr(SRL_HDR_REGEXP) + .short_string("foo") + .short_string("i"), + "Regexp" + ), + "qr/foo/i" + ], + [ + [{foo => 1}, {foo => 2}], + sub { + my $opt = shift; + if ($opt->{no_shared_hashkeys}) { + return array( + hash( + short_string("foo"), + integer(1), + ), + hash( + short_string("foo"), + integer(2), + ), + ); + } + else { + my $content= array_head(2); + return join( + "", + $content, + hash( + short_string("foo"), + integer(1), + ), + hash( + chr(SRL_HDR_COPY) . varint(offset($content)+1), + integer(2), + ), + ) + } + }, + "duplicate hash keys" + ], + [ + { $unicode1 => $unicode2 }, + hash( + chr(SRL_HDR_STR_UTF8) . varint(bytes::length($unicode1)) . encode_utf8($unicode1), + chr(SRL_HDR_STR_UTF8) . varint(bytes::length($unicode2)) . encode_utf8($unicode2), + ), + "simple unicode hash key and value" + ], + [ + sub { \@_ }->(!1,!0), + array(chr(SRL_HDR_FALSE),chr(SRL_HDR_TRUE)), + "true/false" + ] + ); +} sub get_git_top_dir { @@ -494,6 +547,8 @@ sub have_encoder_and_decoder { return(); }; my $cmp_v = $need_class->VERSION; + $cmp_v =~ s/_//; + $cmp_v = sprintf("%.2f", int($cmp_v*100)/100); if (not defined $cmp_v or not exists $compat_versions{$cmp_v}) { note("Could not load correct version of $need_class for testing " ."(got: $cmp_v, needed any of ".join(", ", keys %compat_versions).")"); @@ -634,19 +689,32 @@ if (eval "use Array::RefElem (av_store hv_store); 1") { sub run_roundtrip_tests { - for my $opt ( - ['plain', { } ], - ['snappy', { snappy => 1 } ], - ['snappy_incr', { snappy_incr => 1 } ], - ['sort_keys', { sort_keys => 1 } ], - ['dedupe_strings', { dedupe_strings => 1 } ], - ) { - run_roundtrip_tests_internal(@$opt); + my ($proto_version) = @_; + my @proto_versions = ($proto_version ? ($proto_version) : qw(2 1)); + + for my $proto_version ($proto_version) { + my $suffix = $proto_version == 1 ? "_v1" : ""; + + for my $opt ( + ['plain', { } ], + ['snappy', { snappy => 1 } ], + ['snappy_incr', { snappy_incr => 1 } ], + ['sort_keys', { sort_keys => 1 } ], + ['dedupe_strings', { dedupe_strings => 1 } ], + ['freeze/thaw', { freeze_callbacks => 1 } ], + ) { + my ($name, $opts) = @$opt; + $name .= $suffix; + $opts->{use_protocol_v1} = 1 if $proto_version == 1; + $PROTO_VERSION= $proto_version; + setup_tests(); + run_roundtrip_tests_internal($name, $opts); + } } } sub run_roundtrip_tests_internal { - my ($ename, $opt) = @_; + my ($ename, $opt, $encode_decode_callbacks) = @_; my $decoder = Sereal::Decoder->new($opt); my $encoder = Sereal::Encoder->new($opt); @@ -657,52 +725,77 @@ sub run_roundtrip_tests_internal { ['object-oriented', sub {$encoder->encode(shift)}, sub {$decoder->decode(shift)}], + ['header-body', + sub {$encoder->encode(shift, 123456789)}, # header data is abitrary to stand out for debugging + sub {$decoder->decode(shift)}], + ['header-only', + sub {$encoder->encode(987654321, shift)}, # body data is abitrary to stand out for debugging + sub {$decoder->decode_only_header(shift)}], ) { my ($mname, $enc, $dec) = @$meth; + next if $mname =~ /header/ and $opt->{use_protocol_v1}; foreach my $rt (@RoundtripTests) { my ($name, $data) = @$rt; - my $encoded = $enc->($data); + my $encoded; + eval {$encoded = $enc->($data); 1} + or do { + my $err = $@ || 'Zombie error'; + diag("Got error while encoding: $err"); + }; ok(defined $encoded, "$name ($ename, $mname, encoded defined)") or do { - if (defined $ENV{DEBUG_SEREAL}) { - note("Data was: " . Data::Dumper::Dumper($data)); - note("Output was: " . (defined($encoded) ? $encoded : "")); - } + debug_checks(\$data, \$encoded, undef); next; }; - my $decoded= $dec->($encoded); + my $decoded; + eval {$decoded = $dec->($encoded); 1} + or do { + my $err = $@ || 'Zombie error'; + diag("Got error while decoding: $err"); + }; ok( defined($decoded) == defined($data), "$name ($ename, $mname, decoded definedness)") - or next; - my $encoded2 = $enc->($decoded); + or do { + debug_checks(\$data, \$encoded, undef); + next; + }; + + # Second roundtrip + my $encoded2; + eval {$encoded2 = $enc->($decoded); 1} + or do { + my $err = $@ || 'Zombie error'; + diag("Got error while encoding the second time: $err"); + }; ok(defined $encoded2, "$name ($ename, $mname, encoded2 defined)") - or next; - my $decoded2 = $dec->($encoded2); + or do { + debug_checks(\$data, \$encoded, \$decoded); + next; + }; + + my $decoded2; + eval {$decoded2 = $dec->($encoded2); 1} + or do { + my $err = $@ || 'Zombie error'; + diag("Got error while encoding the second time: $err"); + }; + ok(defined($decoded2) == defined($data), "$name ($ename, $mname, decoded2 defined)") - or next; + or next; is_deeply($decoded, $data, "$name ($ename, $mname, decoded vs data)") - or do { - if ($ENV{DEBUG_DUMP}) { - Dump($decoded); - Dump($data); - } - }; + or do { + debug_checks(\$data, \$encoded2, \$decoded2); + }; is_deeply($decoded2, $data, "$name ($ename, $mname, decoded2 vs data)") - or do { - if ($ENV{DEBUG_DUMP}) { - Dump($decoded2); - Dump($data); - } - }; + or do { + debug_checks(\$data, \$encoded2, \$decoded2); + }; is_deeply($decoded, $decoded2, "$name ($ename, $mname, decoded vs decoded2)") - or do { - if ($ENV{DEBUG_DUMP}) { - Dump($decoded); - Dump($decoded2); - } - }; - + or do { + debug_checks(\$data, \$encoded2, \$decoded2); + }; + if (0) { # It isnt really safe to test this way right now. The exact output # of two runs of Sereal is not guaranteed to be the same due to the effect of @@ -719,19 +812,7 @@ sub run_roundtrip_tests_internal { $ret = is_string($encoded2, $encoded, "$name ($ename, $mname, encoded2 vs encoded)"); } $ret or do { - if ($ENV{DEBUG_DUMP}) { - Dump($decoded); - Dump($data); - } elsif ($ENV{DEBUG_HOBO}) { - open my $pipe,"| perl -Mblib=../Encoder/blib -Mblib=../Decoder/blib author_tools/hobodecoder.pl -e" - or die "Dead: $!"; - print $pipe $encoded; - close $pipe; - open $pipe,"| perl -Mblib=../Encoder/blib -Mblib=../Decoder/blib author_tools/hobodecoder.pl -e" - or die "Dead: $!"; - print $pipe $encoded2; - close $pipe; - } + debug_checks(\$data, \$encoded, \$decoded); }; } } @@ -757,11 +838,12 @@ sub write_test_files { my $make_data_file_name = sub {File::Spec->catfile($dir, sprintf("test_data_%05u", shift))}; my $make_name_file_name = sub {File::Spec->catfile($dir, sprintf("test_name_%05u", shift))}; + setup_tests(); foreach my $testno (1..@BasicTests) { my $t = $BasicTests[$testno-1]; my $data = ref($t->[1]) eq 'CODE' ? $t->[1]->() : $t->[1]; - _write_file($make_data_file_name->($testno), $Header.$data); + _write_file($make_data_file_name->($testno), Header($PROTO_VERSION).$data); _write_file($make_name_file_name->($testno), $t->[2] . "\n"); }