From 39bb5f447c3c7fb66823cd2bf8a1af238035e81d Mon Sep 17 00:00:00 2001 From: gregor herrmann Date: Sun, 13 Apr 2014 17:34:12 +0200 Subject: [PATCH] Imported Upstream version 2.08 --- Changes | 31 +++++++ Encoder.xs | 196 +++++++++++++++++++++++++++++----------- MANIFEST | 5 +- META.json | 59 ++++++++++++ META.yml | 62 ++++++------- lib/Sereal/Encoder.pm | 42 +++++++-- srl_encoder.c | 47 ++++++++-- srl_encoder.h | 12 +-- t/011_aliased_dedupe.t | 29 ++++++ t/900_reentrancy.t | 44 +++++++++ t/lib/Sereal/TestSet.pm | 29 +++--- typemap | 4 +- 12 files changed, 438 insertions(+), 122 deletions(-) create mode 100644 META.json create mode 100644 t/011_aliased_dedupe.t create mode 100644 t/900_reentrancy.t diff --git a/Changes b/Changes index 3638636..aaaebfd 100644 --- a/Changes +++ b/Changes @@ -4,6 +4,37 @@ Revision history for Perl extension Sereal-Encoder * of the decoder before upgrading to version 2 of the * encoder! +2.08 Thu Apr 10 22:10 2013 + - Production release for previous changes. + +2.070_103 Wed Apr 09 00:33 2013 * DEV RELEASE * + - Synchronization release with Decoder. No changes. + +2.070_102 Sun Apr 06 17:27 2013 * DEV RELEASE * + - Fixes for how we load XS so Sereal.pm works properly + with dev releases. + +2.070_101 Sun Apr 06 17:27 2013 * DEV RELEASE * + - Fix for newer perls. + - Changes to 'fixver.pl' and version numbering so we do + a 3 digit minor version, and a 3 digit dev version, + so once this dev release cycle is done we will be at + v2.071 everywhere. This eliminates a version numbering + inconsistency in Sereal.pm from Encoder.pm and Decoder.pm + +2.07_01 Wed Mar 26 18:10 2014 * DEV RELEASE * + - Fix for aliased_dedupe_strings feature (Borislav Nikolov) + - Add sereal_decode_with_object(), a functional/custom-opcode + implementation of the OO interface, with much less overhead. + In practice this will make a very modest impact on dumping, + but if your applications needs it... + Thanks to Zefram for the custom op implementation. + - Optimize dumping hashes by being more careful how we + check if they have backreferences, and avoid creating + a HvAUX() structure (and thus reallocing the hashes bucket + array) just to find out if they have backreferences. + Reported by Steffen. + 2.06 Sun Mar 0 11:40 2014 (AMS time) - Only minor changes. diff --git a/Encoder.xs b/Encoder.xs index 91c1f1d..6f7d575 100644 --- a/Encoder.xs +++ b/Encoder.xs @@ -17,9 +17,149 @@ #include "ptable.h" +#ifndef GvCV_set +# define GvCV_set(gv, cv) (GvCV(gv) = (cv)) +#endif + +#if defined(cv_set_call_checker) && defined(XopENTRY_set) +# define USE_CUSTOM_OPS 1 +#else +# define USE_CUSTOM_OPS 0 +#endif + +#define pp1_sereal_encode_with_object(has_hdr) THX_pp1_sereal_encode_with_object(aTHX_ has_hdr) +static void +THX_pp1_sereal_encode_with_object(pTHX_ U8 has_hdr) +{ + SV *encoder_ref_sv, *encoder_sv, *body_sv, *header_sv; + srl_encoder_t *enc; + char *stash_name; + SV *ret_sv; + dSP; + + header_sv = has_hdr ? POPs : NULL; + body_sv = POPs; + PUTBACK; + + encoder_ref_sv = TOPs; + + if (!expect_true( + encoder_ref_sv && + SvROK(encoder_ref_sv) && + (encoder_sv = SvRV(encoder_ref_sv)) && + SvOBJECT(encoder_sv) && + (stash_name= HvNAME(SvSTASH(encoder_sv))) && + !strcmp(stash_name, "Sereal::Encoder") + )) + { + croak("handle is not a Sereal::Encoder handle"); + } + + enc= (srl_encoder_t *)SvIV(encoder_sv); + + if (header_sv && !SvOK(header_sv)) + header_sv = NULL; + + /* We always copy the string since we might reuse the string buffer. That + * means we already have to do a malloc and we might as well use the + * opportunity to allocate only as much memory as we really need to hold + * the output. */ + ret_sv= srl_dump_data_structure_mortal_sv(aTHX_ enc, body_sv, header_sv, SRL_ENC_SV_COPY_ALWAYS); + SPAGAIN; + TOPs = ret_sv; +} + +#if USE_CUSTOM_OPS + +static OP * +THX_pp_sereal_encode_with_object(pTHX) +{ + pp1_sereal_encode_with_object(PL_op->op_private); + return NORMAL; +} + +static OP * +THX_ck_entersub_args_sereal_encode_with_object(pTHX_ OP *entersubop, GV *namegv, SV *ckobj) +{ + OP *pushop, *firstargop, *cvop, *lastargop, *argop, *newop; + int arity; + + /* Walk the OP structure under the "entersub" to validate that we + * can use the custom OP implementation. */ + + entersubop = ck_entersub_args_proto(entersubop, namegv, ckobj); + pushop = cUNOPx(entersubop)->op_first; + if (!pushop->op_sibling) + pushop = cUNOPx(pushop)->op_first; + firstargop = pushop->op_sibling; + + for (cvop = firstargop; cvop->op_sibling; cvop = cvop->op_sibling) ; + + lastargop = pushop; + + for (arity = 0, lastargop = pushop, argop = firstargop; argop != cvop; + lastargop = argop, argop = argop->op_sibling) + { + arity++; + } + + if (expect_false(arity < 2 || arity > 3)) + return entersubop; + + /* If we get here, we can replace the entersub with a suitable + * sereal_encode_with_object custom OP. */ + + pushop->op_sibling = cvop; + lastargop->op_sibling = NULL; + op_free(entersubop); + newop = newUNOP(OP_CUSTOM, 0, firstargop); + newop->op_private = arity == 3; + newop->op_ppaddr = THX_pp_sereal_encode_with_object; + + return newop; +} + +#endif /* USE_CUSTOM_OPS */ + +static void +THX_xsfunc_sereal_encode_with_object(pTHX_ CV *cv) +{ + dMARK; + dSP; + SSize_t arity = SP - MARK; + PERL_UNUSED_ARG(cv); + if (arity < 2 || arity > 3) + croak("bad Sereal encoder usage"); + pp1_sereal_encode_with_object(arity == 3); +} + MODULE = Sereal::Encoder PACKAGE = Sereal::Encoder PROTOTYPES: DISABLE +BOOT: +{ +#if USE_CUSTOM_OPS + { + XOP *xop; + Newxz(xop, 1, XOP); + XopENTRY_set(xop, xop_name, "sereal_encode_with_object"); + XopENTRY_set(xop, xop_desc, "sereal_encode_with_object"); + XopENTRY_set(xop, xop_class, OA_UNOP); + Perl_custom_op_register(aTHX_ THX_pp_sereal_encode_with_object, xop); + } +#endif /* USE_CUSTOM_OPS */ + { + GV *gv; + CV *cv = newXSproto_portable("Sereal::Encoder::sereal_encode_with_object", + THX_xsfunc_sereal_encode_with_object, __FILE__, "$$;$"); +#if USE_CUSTOM_OPS + cv_set_call_checker(cv, THX_ck_entersub_args_sereal_encode_with_object, (SV*)cv); +#endif /* USE_CUSTOM_OPS */ + gv = gv_fetchpv("Sereal::Encoder::encode", GV_ADDMULTI, SVt_PVCV); + GvCV_set(gv, cv); + } +} + srl_encoder_t * new(CLASS, opt = NULL) char *CLASS; @@ -35,22 +175,6 @@ DESTROY(enc) CODE: srl_destroy_encoder(aTHX_ enc); -void -encode(enc, src, ...) - srl_encoder_t *enc; - SV *src; - SV *hdr_user_data_src = NULL; - PPCODE: - assert(enc != NULL); - if (items > 2 && SvOK(ST(2))) - hdr_user_data_src = ST(2); - enc = srl_dump_data_structure(aTHX_ enc, src, hdr_user_data_src); - assert(enc->buf.pos > enc->buf.start); - /* We always copy the string since we might reuse the string buffer. That means - * we already have to do a malloc and we might as well use the opportunity to - * allocate only as much memory as we really need to hold the output. */ - ST(0) = sv_2mortal(newSVpvn(enc->buf.start, (STRLEN)BUF_POS_OFS(enc->buf))); - XSRETURN(1); void encode_sereal(src, opt = NULL) @@ -61,25 +185,10 @@ encode_sereal(src, opt = NULL) PPCODE: enc = srl_build_encoder_struct(aTHX_ opt); assert(enc != NULL); - enc = srl_dump_data_structure(aTHX_ enc, src, NULL); /* Avoid copy by stealing string buffer if it is not too large. * This makes sense in the functional interface since the string * buffer isn't ever going to be reused. */ - assert(enc->buf.start < enc->buf.pos); - if (BUF_POS_OFS(enc->buf) > 20 && BUF_SPACE(enc->buf) < BUF_POS_OFS(enc->buf) ) { - /* If not wasting more than 2x memory - FIXME fungible */ - SV *sv = sv_2mortal(newSV_type(SVt_PV)); - ST(0) = sv; - SvPV_set(sv, enc->buf.start); - SvLEN_set(sv, BUF_SIZE(enc->buf)); - SvCUR_set(sv, BUF_POS_OFS(enc->buf)); - SvPOK_on(sv); - - enc->buf.start = enc->buf.pos = NULL; /* no need to free these guys now */ - } - else { - ST(0) = sv_2mortal(newSVpvn(enc->buf.start, (STRLEN)BUF_POS_OFS(enc->buf))); - } + ST(0) = srl_dump_data_structure_mortal_sv(aTHX_ enc, src, NULL, SRL_ENC_SV_REUSE_MAYBE); XSRETURN(1); void @@ -94,25 +203,10 @@ encode_sereal_with_header_data(src, hdr_user_data_src, opt = NULL) hdr_user_data_src = NULL; enc = srl_build_encoder_struct(aTHX_ opt); assert(enc != NULL); - enc = srl_dump_data_structure(aTHX_ enc, src, hdr_user_data_src); /* Avoid copy by stealing string buffer if it is not too large. * This makes sense in the functional interface since the string * buffer isn't ever going to be reused. */ - assert(enc->buf.start < enc->buf.pos); - if (BUF_POS_OFS(enc->buf) > 20 && BUF_SPACE(enc->buf) < BUF_POS_OFS(enc->buf) ) { - /* If not wasting more than 2x memory - FIXME fungible */ - SV *sv = sv_2mortal(newSV_type(SVt_PV)); - ST(0) = sv; - SvPV_set(sv, enc->buf.start); - SvLEN_set(sv, BUF_SIZE(enc->buf)); - SvCUR_set(sv, BUF_POS_OFS(enc->buf)); - SvPOK_on(sv); - - enc->buf.start = enc->buf.pos = NULL; /* no need to free these guys now */ - } - else { - ST(0) = sv_2mortal(newSVpvn(enc->buf.start, (STRLEN)BUF_POS_OFS(enc->buf))); - } + ST(0) = srl_dump_data_structure_mortal_sv(aTHX_ enc, src, hdr_user_data_src, SRL_ENC_SV_REUSE_MAYBE); XSRETURN(1); MODULE = Sereal::Encoder PACKAGE = Sereal::Encoder::Constants @@ -135,16 +229,16 @@ test() CODE: tbl = PTABLE_new_size(10); for (i = 0; i < (UV)n; ++i) { - PTABLE_store(tbl, (void *)(1000+i), (void *)(1000+i)); + PTABLE_store(tbl, INT2PTR(void *,(1000+i)), INT2PTR(void *, (1000+i))); check[i] = fail; } for (i = 0; i < (UV)n; ++i) { - const UV res = (UV)PTABLE_fetch(tbl, (void *)(1000+i)); + const UV res = (UV)PTABLE_fetch(tbl, INT2PTR(void *, (1000+i))); printf("%sok %u - fetch %u\n", (res == (UV)(1000+i)) ? noop : fail, (unsigned int)(1+i), (unsigned int)(i+1)); } iter = PTABLE_iter_new(tbl); while ( NULL != (ent = PTABLE_iter_next(iter)) ) { - const UV res = ((UV)ent->value) - 1000; + const UV res = (PTR2UV(ent->value)) - 1000; if (res < 20) check[res] = noop; else diff --git a/MANIFEST b/MANIFEST index 52a221d..1d71f27 100644 --- a/MANIFEST +++ b/MANIFEST @@ -34,6 +34,7 @@ t/001_load.t t/002_constants.t t/003_ptable.t t/010_desperate.t +t/011_aliased_dedupe.t t/020_sort_keys.t t/021_sort_keys_option.t t/110_nobless.t @@ -46,8 +47,10 @@ t/400_evil.t t/700_roundtrip.t t/701_roundtrip_v1.t t/800_threads.t +t/900_reentrancy.t t/data/corpus t/lib/Sereal/BulkTest.pm t/lib/Sereal/TestSet.pm typemap -META.yml Module meta-data (added by MakeMaker) +META.yml Module YAML meta-data (added by MakeMaker) +META.json Module JSON meta-data (added by MakeMaker) diff --git a/META.json b/META.json new file mode 100644 index 0000000..6007bd5 --- /dev/null +++ b/META.json @@ -0,0 +1,59 @@ +{ + "abstract" : "Fast, compact, powerful binary serialization", + "author" : [ + "Steffen Mueller , Yves Orton " + ], + "dynamic_config" : 1, + "generated_by" : "ExtUtils::MakeMaker version 6.9, CPAN::Meta::Converter version 2.120351", + "license" : [ + "perl_5" + ], + "meta-spec" : { + "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec", + "version" : "2" + }, + "name" : "Sereal-Encoder", + "no_index" : { + "directory" : [ + "t", + "inc" + ] + }, + "prereqs" : { + "build" : { + "requires" : { + "Data::Dumper" : "0", + "ExtUtils::ParseXS" : "2.21", + "File::Find" : "0", + "File::Path" : "0", + "File::Spec" : "0", + "Scalar::Util" : "0", + "Sereal::Decoder" : "2.06", + "Test::LongString" : "0", + "Test::More" : "0.88", + "Test::Warn" : "0" + } + }, + "configure" : { + "requires" : { + "ExtUtils::MakeMaker" : "0" + } + }, + "runtime" : { + "requires" : { + "XSLoader" : "0", + "perl" : "5.008" + } + } + }, + "release_status" : "stable", + "resources" : { + "bugtracker" : { + "web" : "https://github.com/Sereal/Sereal/issues" + }, + "repository" : { + "url" : "git://github.com/Sereal/Sereal.git" + } + }, + "version" : "2.08" +} diff --git a/META.yml b/META.yml index 6a3c4b3..d949264 100644 --- a/META.yml +++ b/META.yml @@ -1,35 +1,35 @@ ---- #YAML:1.0 -name: Sereal-Encoder -version: 2.06 -abstract: Fast, compact, powerful binary serialization +--- +abstract: 'Fast, compact, powerful binary serialization' author: - - Steffen Mueller , Yves Orton -license: perl -distribution_type: module -configure_requires: - ExtUtils::MakeMaker: 0 + - 'Steffen Mueller , Yves Orton ' build_requires: - Data::Dumper: 0 - ExtUtils::ParseXS: 2.21 - File::Find: 0 - File::Path: 0 - File::Spec: 0 - Scalar::Util: 0 - Sereal::Decoder: 2.06 - Test::LongString: 0 - Test::More: 0.88 - Test::Warn: 0 + Data::Dumper: 0 + ExtUtils::ParseXS: 2.21 + File::Find: 0 + File::Path: 0 + File::Spec: 0 + Scalar::Util: 0 + Sereal::Decoder: 2.06 + Test::LongString: 0 + Test::More: 0.88 + Test::Warn: 0 +configure_requires: + ExtUtils::MakeMaker: 0 +dynamic_config: 1 +generated_by: 'ExtUtils::MakeMaker version 6.9, CPAN::Meta::Converter version 2.120351' +license: perl +meta-spec: + url: http://module-build.sourceforge.net/META-spec-v1.4.html + version: 1.4 +name: Sereal-Encoder +no_index: + directory: + - t + - inc requires: - perl: 5.008 - XSLoader: 0 + XSLoader: 0 + perl: 5.008 resources: - bugtracker: https://github.com/Sereal/Sereal/issues - repository: git://github.com/Sereal/Sereal.git -no_index: - directory: - - t - - inc -generated_by: ExtUtils::MakeMaker version 6.57_05 -meta-spec: - url: http://module-build.sourceforge.net/META-spec-v1.4.html - version: 1.4 + bugtracker: https://github.com/Sereal/Sereal/issues + repository: git://github.com/Sereal/Sereal.git +version: 2.08 diff --git a/lib/Sereal/Encoder.pm b/lib/Sereal/Encoder.pm index 4849100..ad27e3a 100644 --- a/lib/Sereal/Encoder.pm +++ b/lib/Sereal/Encoder.pm @@ -5,22 +5,23 @@ use warnings; use Carp qw/croak/; use XSLoader; -our $VERSION = '2.06'; # Don't forget to update the TestCompat set for testing against installed decoders! +our $VERSION = '2.08'; # Don't forget to update the TestCompat set for testing against installed decoders! +our $XS_VERSION = $VERSION; $VERSION= eval $VERSION; # not for public consumption, just for testing. (my $num_version = $VERSION) =~ s/_//; -my $TestCompat = [ map sprintf("%.2f", $_/100), reverse( 200 .. int($num_version * 100) ) ]; # compat with 2.00 to ... +my $TestCompat = [ map sprintf("%.2f", $_/100), reverse( 207 .. int($num_version * 100) ) ]; # compat with 2.07 to ... sub _test_compat {return(@$TestCompat, $VERSION)} use Exporter 'import'; -our @EXPORT_OK = qw(encode_sereal encode_sereal_with_header_data); +our @EXPORT_OK = qw(encode_sereal encode_sereal_with_header_data sereal_encode_with_object); our %EXPORT_TAGS = (all => \@EXPORT_OK); # export by default if run from command line our @EXPORT = ((caller())[1] eq '-e' ? @EXPORT_OK : ()); sub CLONE_SKIP {1} -XSLoader::load('Sereal::Encoder', $VERSION); +XSLoader::load('Sereal::Encoder', $XS_VERSION); 1; @@ -34,11 +35,15 @@ Sereal::Encoder - Fast, compact, powerful binary serialization =head1 SYNOPSIS - use Sereal::Encoder qw(encode_sereal); + use Sereal::Encoder qw(encode_sereal sereal_encode_with_object); my $encoder = Sereal::Encoder->new({...options...}); my $out = $encoder->encode($structure); - # alternatively: + + # alternatively the functional interface: + $out = sereal_encode_with_object($encoder, $structure); + + # much slower functional interface with no persistent objects: $out = encode_sereal($structure, {... options ...}); =head1 DESCRIPTION @@ -47,6 +52,8 @@ This library implements an efficient, compact-output, and feature-rich serializer using a binary protocol called I. Its sister module L implements a decoder for this format. The two are released separately to allow for independent and safer upgrading. +If you care greatly about performance, consider reading the L +documentation after finishing this document. The Sereal protocol version emitted by this encoder implementation is currently protocol version 2 by default. @@ -273,20 +280,33 @@ L. =head1 EXPORTABLE FUNCTIONS +=head2 sereal_encode_with_object + +The functional interface that is equivalent to using C. Takes an +encoder object reference as first argument, followed by a data structure +to serialize. + +This functional interface is marginally faster than the OO interface +since it avoids method resolution overhead and, on sufficiently modern +Perl versions, can usually avoid subroutine call overhead. + =head2 encode_sereal The functional interface that is equivalent to using C and C. Expects a data structure to serialize as first argument, optionally followed by a hash reference of options (see documentation for C). -The functional interface is quite a bit slower than the OO interface since +This functional interface is significantly slower than the OO interface since it cannot reuse the encoder object. =head1 PERFORMANCE -If you care about performance at all, then use the object-oriented interface -instead of the functional interface. It's a significant difference in performance -if you are serializing small data structures. +See L for detailed considerations on performance +tuning. Let it just be said that: + +B or the +OO interface instead of L. It's a significant difference +in performance if you are serializing small data structures.> The exact performance in time and space depends heavily on the data structure to be serialized. Often there is a trade-off between space and time. If in doubt, @@ -493,6 +513,8 @@ Daniel Dragan Ebulkdd@cpan.orgE (Windows support and bugfixes) Zefram +Borislav Nikolov + Some inspiration and code was taken from Marc Lehmann's excellent L module due to obvious overlap in problem domain. Thank you! diff --git a/srl_encoder.c b/srl_encoder.c index 2069ecd..c9a8568 100644 --- a/srl_encoder.c +++ b/srl_encoder.c @@ -100,6 +100,14 @@ SRL_STATIC_INLINE PTABLE_t *srl_init_freezeobj_svhash(srl_encoder_t *enc); SRL_STATIC_INLINE PTABLE_t *srl_init_weak_hash(srl_encoder_t *enc); SRL_STATIC_INLINE HV *srl_init_string_deduper_hv(pTHX_ srl_encoder_t *enc); +/* Note: This returns an encoder struct pointer because it will + * clone the current encoder struct if it's dirty. That in + * turn means in order to access the output buffer, you need + * to inspect the returned encoder struct. If necessary, it + * will be cleaned up automatically by Perl, so don't bother + * freeing it. */ +SRL_STATIC_INLINE srl_encoder_t *srl_dump_data_structure(pTHX_ srl_encoder_t *enc, SV *src, SV *user_header_src); + #define SRL_GET_STR_DEDUPER_HV(enc) ( (enc)->string_deduper_hv == NULL \ ? srl_init_string_deduper_hv(aTHX_ enc) \ : (enc)->string_deduper_hv ) @@ -692,7 +700,7 @@ srl_dump_classname(pTHX_ srl_encoder_t *enc, SV *referent, SV *replacement) srl_buf_cat_char(enc, expect_false(replacement) ? SRL_HDR_OBJECT_FREEZE : SRL_HDR_OBJECT); /* remember current offset before advancing it */ - PTABLE_store(string_seenhash, (void *)stash, (void *)BODY_POS_OFS(enc->buf)); + PTABLE_store(string_seenhash, (void *)stash, INT2PTR(void *, BODY_POS_OFS(enc->buf))); /* HvNAMEUTF8 not in older perls and it would be 0 for those anyway */ #if PERL_VERSION >= 16 @@ -771,7 +779,7 @@ srl_reset_snappy_header_flag(srl_encoder_t *enc) (*flags_and_version_byte & SRL_PROTOCOL_VERSION_MASK); } -srl_encoder_t * +SRL_STATIC_INLINE srl_encoder_t * srl_dump_data_structure(pTHX_ srl_encoder_t *enc, SV *src, SV *user_header_src) { enc = srl_prepare_encoder(aTHX_ enc); @@ -862,6 +870,29 @@ srl_dump_data_structure(pTHX_ srl_encoder_t *enc, SV *src, SV *user_header_src) return enc; } +SV * +srl_dump_data_structure_mortal_sv(pTHX_ srl_encoder_t *enc, SV *src, SV *user_header_src, const U32 flags) +{ + assert(enc); + enc = srl_dump_data_structure(aTHX_ enc, src, user_header_src); + assert(enc->buf.start && enc->buf.pos && enc->buf.pos > enc->buf.start); + + if ( flags && /* for now simpler and equivalent to: flags == SRL_ENC_SV_REUSE_MAYBE */ + (BUF_POS_OFS(enc->buf) > 20 && BUF_SPACE(enc->buf) < BUF_POS_OFS(enc->buf) ) + ){ + /* If not wasting more than 2x memory - FIXME fungible */ + SV *sv = sv_2mortal(newSV_type(SVt_PV)); + SvPV_set(sv, enc->buf.start); + SvLEN_set(sv, BUF_SIZE(enc->buf)); + SvCUR_set(sv, BUF_POS_OFS(enc->buf)); + SvPOK_on(sv); + enc->buf.start = enc->buf.pos = NULL; /* no need to free these guys now */ + return sv; + } + + return sv_2mortal(newSVpvn(enc->buf.start, (STRLEN)BUF_POS_OFS(enc->buf))); +} + SRL_STATIC_INLINE void srl_fixup_weakrefs(pTHX_ srl_encoder_t *enc) { @@ -1189,7 +1220,7 @@ srl_dump_hk(pTHX_ srl_encoder_t *enc, HE *src, const int share_keys) else { /* remember current offset before advancing it */ const ptrdiff_t newoffset = BODY_POS_OFS(enc->buf); - PTABLE_store(string_seenhash, (void *)str, (void *)newoffset); + PTABLE_store(string_seenhash, (void *)str, INT2PTR(void *, newoffset)); } } len= HeKLEN(src); @@ -1221,6 +1252,8 @@ srl_dump_svpv(pTHX_ srl_encoder_t *enc, SV *src) SV *ofs_sv= HeVAL(dupe_offset_he); if (SvIOK(ofs_sv)) { /* emit copy or alias */ + if (out_tag == SRL_HDR_ALIAS) + SRL_SET_FBIT(*(enc->buf.body_pos + SvUV(ofs_sv))); srl_buf_cat_varint(aTHX_ enc, out_tag, SvIV(ofs_sv)); return; } else if (SvUOK(ofs_sv)) { @@ -1293,7 +1326,7 @@ redo_dump: mg = mg_find(src, PERL_MAGIC_backref); } #ifdef HAS_HV_BACKREFS - if (svt == SVt_PVHV) { + if (expect_false( svt == SVt_PVHV && SvOOK(src) )) { backrefs= *Perl_hv_backreferences_p(aTHX_ (HV *)src); if (DEBUGHACK) warn("backreferences %p", src); } @@ -1305,12 +1338,12 @@ redo_dump: /* not seen it before */ if (DEBUGHACK) warn("scalar %p - is weak referent, storing %lu", src, weakref_ofs); /* if weakref_ofs is false we got here some way that holds a refcount on this item */ - PTABLE_store(weak_seenhash, src, (void *)weakref_ofs); + PTABLE_store(weak_seenhash, src, INT2PTR(void *, weakref_ofs)); } else { if (DEBUGHACK) warn("scalar %p - is weak referent, seen before value:%lu weakref_ofs:%lu", src, (UV)pe->value, (UV)weakref_ofs); if (pe->value) - pe->value= (void *)weakref_ofs; + pe->value= INT2PTR(void *, weakref_ofs); } refcount++; weakref_ofs= 0; @@ -1348,7 +1381,7 @@ redo_dump: return; } if (DEBUGHACK) warn("storing %p as %lu", src, (long unsigned int)BODY_POS_OFS(enc->buf)); - PTABLE_store(ref_seenhash, src, (void *)BODY_POS_OFS(enc->buf)); + PTABLE_store(ref_seenhash, src, INT2PTR(void *, BODY_POS_OFS(enc->buf))); } } if (expect_false( weakref_ofs != 0 )) { diff --git a/srl_encoder.h b/srl_encoder.h index 6f0bd01..7c450e8 100644 --- a/srl_encoder.h +++ b/srl_encoder.h @@ -11,6 +11,7 @@ # define INITIALIZATION_SIZE 64 #endif +#include "srl_inline.h" #include "srl_buffer_types.h" typedef struct PTABLE * ptable_ptr; @@ -49,13 +50,7 @@ void srl_destroy_encoder(pTHX_ srl_encoder_t *enc); /* Write Sereal packet header to output buffer */ void srl_write_header(pTHX_ srl_encoder_t *enc, SV *user_header_src); /* Start dumping a top-level SV */ -/* Note: This returns an encoder struct pointer because it will - * clone the current encoder struct if it's dirty. That in - * turn means in order to access the output buffer, you need - * to inspect the returned encoder struct. If necessary, it - * will be cleaned up automatically by Perl, so don't bother - * freeing it. */ -srl_encoder_t *srl_dump_data_structure(pTHX_ srl_encoder_t *enc, SV *src, SV *user_header_src); +SV *srl_dump_data_structure_mortal_sv(pTHX_ srl_encoder_t *enc, SV *src, SV *user_header_src, const U32 flags); /* define option bits in srl_encoder_t's flags member */ @@ -123,4 +118,7 @@ srl_encoder_t *srl_dump_data_structure(pTHX_ srl_encoder_t *enc, SV *src, SV *us #define SRL_ENC_SET_OPER_FLAG(enc, flag_num) STMT_START {(enc)->operational_flags |= (flag_num);}STMT_END #define SRL_ENC_RESET_OPER_FLAG(enc, flag_num) STMT_START {(enc)->operational_flags &= ~(flag_num);}STMT_END +#define SRL_ENC_SV_COPY_ALWAYS 0x00000000UL +#define SRL_ENC_SV_REUSE_MAYBE 0x00000001UL + #endif diff --git a/t/011_aliased_dedupe.t b/t/011_aliased_dedupe.t new file mode 100644 index 0000000..0d6febc --- /dev/null +++ b/t/011_aliased_dedupe.t @@ -0,0 +1,29 @@ +#!perl +use strict; +use warnings; +# most be loaded before Sereal::TestSet +use Sereal::Encoder qw(encode_sereal); +use Sereal::Encoder::Constants qw(:all); +use Sereal::Decoder; + +use File::Spec; +use Scalar::Util qw(refaddr reftype); +use lib File::Spec->catdir(qw(t lib)); +BEGIN { + lib->import('lib') + if !-d 't'; +} + +use Sereal::TestSet qw(:all); +use Data::Dumper; # must be loaded AFTER the test set (bug in perl) +use Test::More; +my $dup = "bad" x 100; +my $dup2 = "beef" x 100; +my $enc = Sereal::Encoder->new({aliased_dedupe_strings => 1}); +my $encoded = $enc->encode([$dup,"a",$dup2,"b",$dup,"c",$dup2,"d"]); +my $decoded = Sereal::Decoder::decode_sereal($encoded); +is($decoded->[0],$dup); +is($decoded->[2],$dup2); +is(refaddr(\$decoded->[0]),refaddr(\$decoded->[4]),"expected same reference for decoded->[0] and decoded->[2]"); +is(refaddr(\$decoded->[2]),refaddr(\$decoded->[6]),"expected same reference for decoded->[2] and decoded->[6]"); +done_testing(); diff --git a/t/900_reentrancy.t b/t/900_reentrancy.t new file mode 100644 index 0000000..36c5550 --- /dev/null +++ b/t/900_reentrancy.t @@ -0,0 +1,44 @@ +#!perl +use strict; +use warnings; +use Test::More; + +# Encoder reentrancy test courtesy of Zefram + +use Sereal::Encoder; +use Sereal::Decoder; + +my $enc = Sereal::Encoder->new({freeze_callbacks=>1}); + +package Foo; +sub FREEZE { $enc->encode($_[0]->{a}) } +sub THAW { + my $class = shift; + return bless( + {a => Sereal::Decoder->new->decode($_[1])} + => $class + ); +} + +package main; + +my $data = bless({a=>42},"Foo"); +my $a = $enc->encode($data); +my $output; +my $err; +eval { + $output = Sereal::Decoder->new->decode($a); + 1 +} +or do { + $err = $@ || "Zombie Error"; +}; + +ok(!$err, "Decoding did not barf") + or diag("Decoding barfed with '$err'"); + +is_deeply($output, + $data, + "Decoded result is correct"); + +done_testing(); diff --git a/t/lib/Sereal/TestSet.pm b/t/lib/Sereal/TestSet.pm index ef2e90a..354f1e4 100644 --- a/t/lib/Sereal/TestSet.pm +++ b/t/lib/Sereal/TestSet.pm @@ -110,7 +110,9 @@ sub dump_bless { sub short_string { die if length($_[0]) > SRL_MASK_SHORT_BINARY_LEN; - return chr(SRL_HDR_SHORT_BINARY_LOW + length($_[0])) . $_[0]; + my $tag = SRL_HDR_SHORT_BINARY_LOW + length($_[0]); + $tag |= SRL_HDR_TRACK_FLAG if $_[1]; + return pack("c a*",$tag,$_[0]); } sub integer { @@ -234,7 +236,7 @@ sub setup_tests { my $d = array_head(3); my $pos = offset($d); my $tag = $opt->{aliased_dedupe_strings} ? SRL_HDR_ALIAS : SRL_HDR_COPY; - $d .= short_string("foooo") . chr($tag) . varint($pos) + $d .= short_string("foooo",$opt->{aliased_dedupe_strings} ? 1 : 0) . chr($tag) . varint($pos) . chr($tag) . varint($pos); return $d; } @@ -258,7 +260,7 @@ sub setup_tests { my $tag = $opt->{aliased_dedupe_strings} ? SRL_HDR_ALIAS : SRL_HDR_COPY; my $d = array_head(2) . hash_head(2) . short_string("foooo"); my $pos = offset($d); - $d .= short_string("foooo") . hash_head(2) + $d .= short_string("foooo",$opt->{aliased_dedupe_strings} ? 1 : 0) . hash_head(2) . short_string("foooo2") . chr($tag) . varint($pos); return $d; @@ -719,18 +721,21 @@ sub run_roundtrip_tests_internal { my $encoder = Sereal::Encoder->new($opt); foreach my $meth ( - ['functional', - sub {Sereal::Encoder::encode_sereal(shift, $opt)}, - sub {Sereal::Decoder::decode_sereal(shift, $opt)}], + ['functional simple', + sub {Sereal::Encoder::encode_sereal($_[0], $opt)}, + sub {Sereal::Decoder::decode_sereal($_[0], $opt)}], ['object-oriented', - sub {$encoder->encode(shift)}, - sub {$decoder->decode(shift)}], + sub {$encoder->encode($_[0])}, + sub {$decoder->decode($_[0])}], + ['functional with object', + sub {Sereal::Encoder::sereal_encode_with_object($encoder, $_[0])}, + sub {Sereal::Decoder::sereal_decode_with_object($decoder, $_[0])}], ['header-body', - sub {$encoder->encode(shift, 123456789)}, # header data is abitrary to stand out for debugging - sub {$decoder->decode(shift)}], + sub {$encoder->encode($_[0], 123456789)}, # header data is abitrary to stand out for debugging + sub {$decoder->decode($_[0])}], ['header-only', - sub {$encoder->encode(987654321, shift)}, # body data is abitrary to stand out for debugging - sub {$decoder->decode_only_header(shift)}], + sub {$encoder->encode(987654321, $_[0])}, # body data is abitrary to stand out for debugging + sub {$decoder->decode_only_header($_[0])}], ) { my ($mname, $enc, $dec) = @$meth; diff --git a/typemap b/typemap index 99d9b48..8aaa7b9 100644 --- a/typemap +++ b/typemap @@ -1,5 +1,3 @@ -# from "perlobject.map" Dean Roehrich, version 19960302 - # O_OBJECT -> link an opaque C or C++ object to a blessed Perl object. srl_encoder_t * O_OBJECT srl_decoder_t * O_OBJECT @@ -17,7 +15,7 @@ INPUT O_OBJECT if( sv_isobject($arg) && (SvTYPE(SvRV($arg)) == SVt_PVMG) ) - $var = ($type)SvIV((SV*)SvRV( $arg )); + $var = INT2PTR($type, SvIV((SV*)SvRV( $arg ))); else{ warn( \"${Package}::$func_name() -- $var is not a blessed SV reference\" ); XSRETURN_UNDEF; -- 2.30.2