Revision history for Perl extension Sereal-Encoder
+0.35 Mon Apr 1 11:50 2013 (AMS time)
+ - Add new no_bless_objects option from Simon Bertrang.
+
+0.34 Sat Mar 23 18:59:18 2013 (AMS time)
+ - Fixup Manifest
+
+0.33 Sun Feb 17 17:26 2013 (AMS time)
+ - Fix problem with hv_backrefs (Issue #27)
+
+0.32 Sun Feb 17 15:06 2013 (AMS time)
+ - Add "dedupe_strings" option, which will make
+ the encoder do extra work to dedupe string values
+ in the serialized output.
+
0.31 Sun Feb 17 15:06 2013 (AMS time)
- Daniel Dragan <bulk88@hotmail.com> spent a bunch of time
digging into the weird problems we were having with Snappy
ppport.h
ptable.h
snappy/csnappy.h
+snappy/csnappy_compat.h
snappy/csnappy_compress.c
snappy/csnappy_decompress.c
snappy/csnappy_internal.h
t/003_ptable.t
t/010_desperate.t
t/020_sort_keys.t
+t/021_sort_keys_option.t
t/100_roundtrip.t
+t/110_nobless.t
t/160_recursion.t
t/200_bulk.t
t/300_fail.t
--- #YAML:1.0
name: Sereal-Encoder
-version: 0.31
+version: 0.35
abstract: Fast, compact, powerful binary serialization
author:
- Steffen Mueller <smueller@cpan.org>, Yves Orton <yves@cpan.org>
perl: 5.008
XSLoader: 0
resources:
+ bugtracker: https://github.com/Sereal/Sereal/issues
repository: git://github.com/Sereal/Sereal.git
no_index:
directory:
MIN_PERL_VERSION => '5.008',
META_MERGE => {
resources => {
- repository => 'git://github.com/Sereal/Sereal.git'
+ repository => 'git://github.com/Sereal/Sereal.git',
+ bugtracker => 'https://github.com/Sereal/Sereal/issues',
},
},
BUILD_REQUIRES => {
use Carp qw/croak/;
use XSLoader;
-our $VERSION = '0.31'; # Don't forget to update the TestCompat set for testing against installed decoders!
+our $VERSION = '0.35'; # Don't forget to update the TestCompat set for testing against installed decoders!
# not for public consumption, just for testing.
-my $TestCompat = [map sprintf("%.2f", $_/100), reverse(23..31)]; # compat with 0.23 to ...
+my $TestCompat = [ map sprintf("%.2f", $_/100), reverse( 23 .. int($VERSION * 100) ) ]; # compat with 0.23 to ...
sub _test_compat {return(@$TestCompat, $VERSION)}
use Exporter 'import';
a destructor on a remote system or generally executing code based on
data.
+See also C<no_bless_objects> to skip the blessing of objects.
+When both flags are set, C<croak_on_bless> has a higher precedence then
+C<no_bless_objects>.
+
+=head3 no_bless_objects
+
+If this option is set, then the encoder will serialize blessed references
+without the bless information and provide plain data structures instead.
+
+See also the C<croak_on_bless> option above for more details.
+
=head3 undef_unknown
If set, unknown/unsupported data structures will be encoded as C<undef>
See L</NON-CANONICAL> for why you might want to use this, and for the
various caveats involved.
+=head3 dedupe_strings
+
+If true Sereal will use a hash to dedupe strings during serialization. This
+has a peformance and memory penalty so it defaults to off, but data structures
+with many duplicated strings will see a significant reduction in the size of
+the encoded form. Currently only strings longer than 3 characters will be
+deduped, however this may change in the future.
+
+Note that Sereal will perform certain types of deduping automatically even
+without this option. In particular class names and hash keys are deduped
+regardless of this option. Only enable this if you have good reason to
+believe that there are many duplicated strings as values in your data
+structure.
+
+Use of this option does not require an upgraded decoder. The deduping
+is performed in such a way that older decoders should handle it just fine.
+
=head1 INSTANCE METHODS
=head2 encode
#ifndef PTABLE_H_
#define PTABLE_H_
-#include "ppport.h"
+#include <assert.h>
#include <limits.h>
+#include "ppport.h"
#if PTRSIZE == 8
/*
* This is one of Thomas Wang's hash functions for 64-bit integers from:
* http://www.concentric.net/~Ttwang/tech/inthash.htm
*/
- STATIC U32 ptr_hash(PTRV u) {
+ SRL_STATIC_INLINE U32 ptr_hash(PTRV u) {
u = (~u) + (u << 18);
u = u ^ (u >> 31);
u = u * 21;
* This is one of Bob Jenkins' hash functions for 32-bit integers
* from: http://burtleburtle.net/bob/hash/integer.html
*/
- STATIC U32 ptr_hash(PTRV u) {
+ SRL_STATIC_INLINE U32 ptr_hash(PTRV u) {
u = (u + 0x7ed55d16) + (u << 12);
u = (u ^ 0xc761c23c) ^ (u >> 19);
u = (u + 0x165667b1) + (u << 5);
--- /dev/null
+#ifndef CSNAPPY_COMPAT_H
+
+/* This file was added to Sereal to attempt some MSVC compatibility,
+ * but is at best a band-aid. And done without a lot of experience
+ * in whatever subset of C99 MSVC supports.
+ */
+
+#ifndef INLINE
+# if defined(_MSC_VER)
+# define INLINE __inline
+# else
+# define INLINE inline
+# endif
+#endif
+
+#endif
File modified for the Linux Kernel by
Zeev Tarantov <zeev.tarantov@gmail.com>
+
+File modified for Sereal by
+Steffen Mueller <smueller@cpan.org>
*/
#include "csnappy_internal.h"
#include "csnappy.h"
-static inline char*
+static INLINE char*
encode_varint32(char *sptr, uint32_t v)
{
uint8_t* ptr = (uint8_t *)sptr;
* input. Of course, it doesn't hurt if the hash function is reasonably fast
* either, as it gets called a lot.
*/
-static inline uint32_t HashBytes(uint32_t bytes, int shift)
+static INLINE uint32_t HashBytes(uint32_t bytes, int shift)
{
uint32_t kMul = 0x1e35a7bd;
return (bytes * kMul) >> shift;
}
-static inline uint32_t Hash(const char *p, int shift)
+static INLINE uint32_t Hash(const char *p, int shift)
{
return HashBytes(UNALIGNED_LOAD32(p), shift);
}
* x86_64 is little endian.
*/
#if defined(__x86_64__)
-static inline int
+static INLINE int
FindMatchLength(const char *s1, const char *s2, const char *s2_limit)
{
uint64_t x;
return matched;
}
#else /* !defined(__x86_64__) */
-static inline int
+static INLINE int
FindMatchLength(const char *s1, const char *s2, const char *s2_limit)
{
/* Implementation based on the x86-64 version, above. */
#endif /* !defined(__x86_64__) */
-static inline char*
+static INLINE char*
EmitLiteral(char *op, const char *literal, int len, int allow_fast_path)
{
int n = len - 1; /* Zero-length literals are disallowed */
return op + len;
}
-static inline char*
+static INLINE char*
EmitCopyLessThan64(char *op, int offset, int len)
{
DCHECK_LE(len, 64);
return op;
}
-static inline char*
+static INLINE char*
EmitCopy(char *op, int offset, int len)
{
/* Emit 64 byte copies but make sure to keep at least four bytes
are slower than UNALIGNED_LOAD64(p) followed by shifts and casts to uint32.
We have different versions for 64- and 32-bit; ideally we would avoid the
-two functions and just inline the UNALIGNED_LOAD64 call into
+two functions and just INLINE the UNALIGNED_LOAD64 call into
GetUint32AtOffset, but GCC (at least not as of 4.6) is seemingly not clever
enough to avoid loading the value multiple times then. For 64-bit, the load
is done when GetEightBytesAt() is called, whereas for 32-bit, the load is
typedef uint64_t EightBytesReference;
-static inline EightBytesReference GetEightBytesAt(const char* ptr) {
+static INLINE EightBytesReference GetEightBytesAt(const char* ptr) {
return UNALIGNED_LOAD64(ptr);
}
-static inline uint32_t GetUint32AtOffset(uint64_t v, int offset) {
+static INLINE uint32_t GetUint32AtOffset(uint64_t v, int offset) {
DCHECK_GE(offset, 0);
DCHECK_LE(offset, 4);
#ifdef __LITTLE_ENDIAN
typedef const char* EightBytesReference;
-static inline EightBytesReference GetEightBytesAt(const char* ptr) {
+static INLINE EightBytesReference GetEightBytesAt(const char* ptr) {
return ptr;
}
-static inline uint32_t GetUint32AtOffset(const char* v, int offset) {
+static INLINE uint32_t GetUint32AtOffset(const char* v, int offset) {
DCHECK_GE(offset, 0);
DCHECK_LE(offset, 4);
return UNALIGNED_LOAD32(v + offset);
File modified for the Linux Kernel by
Zeev Tarantov <zeev.tarantov@gmail.com>
+
+File modified for Sereal by
+Steffen Mueller <smueller@cpan.org>
*/
#include "csnappy_internal.h"
* Note that this does not match the semantics of either memcpy()
* or memmove().
*/
-static inline void IncrementalCopy(const char *src, char *op, int len)
+static INLINE void IncrementalCopy(const char *src, char *op, int len)
{
DCHECK_GT(len, 0);
do {
* position 1. Thus, ten excess bytes.
*/
static const int kMaxIncrementCopyOverflow = 10;
-static inline void IncrementalCopyFastPath(const char *src, char *op, int len)
+static INLINE void IncrementalCopyFastPath(const char *src, char *op, int len)
{
while (op - src < 8) {
UnalignedCopy64(src, op);
char *op_limit;
};
-static inline int
+static INLINE int
SAW__AppendFastPath(struct SnappyArrayWriter *this,
const char *ip, uint32_t len)
{
return CSNAPPY_E_OK;
}
-static inline int
+static INLINE int
SAW__Append(struct SnappyArrayWriter *this,
const char *ip, uint32_t len)
{
return CSNAPPY_E_OK;
}
-static inline int
+static INLINE int
SAW__AppendFromSelf(struct SnappyArrayWriter *this,
uint32_t offset, uint32_t len)
{
File modified for the Linux Kernel by
Zeev Tarantov <zeev.tarantov@gmail.com>
+
+File modified for Sereal by
+Steffen Mueller <smueller@cpan.org>
*/
#ifndef CSNAPPY_INTERNAL_H_
#define CSNAPPY_INTERNAL_H_
+#include "csnappy_compat.h"
+
#ifndef __KERNEL__
#include "csnappy_internal_userspace.h"
#include <string.h>
defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) || defined(__ARMV6__) || \
defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__)
-static inline void UnalignedCopy64(const void *src, void *dst) {
+static INLINE void UnalignedCopy64(const void *src, void *dst) {
#if defined(__i386__) || defined(__x86_64__) || defined(__powerpc__) || ARCH_ARM_HAVE_UNALIGNED
if ((sizeof(void *) == 8) || (sizeof(long) == 8)) {
UNALIGNED_STORE64(dst, UNALIGNED_LOAD64(src));
#if defined(__arm__)
#if ARCH_ARM_HAVE_UNALIGNED
- static inline uint32_t get_unaligned_le(const void *p, uint32_t n)
+ static INLINE uint32_t get_unaligned_le(const void *p, uint32_t n)
{
uint32_t wordmask = (1U << (8 * n)) - 1;
return get_unaligned_le32(p) & wordmask;
#define get_unaligned_le get_unaligned_le_armv5
#endif
#else
- static inline uint32_t get_unaligned_le(const void *p, uint32_t n)
+ static INLINE uint32_t get_unaligned_le(const void *p, uint32_t n)
{
/* Mapping from i in range [0,4] to a mask to extract the bottom 8*i bits */
static const uint32_t wordmask[] = {
File modified by
Zeev Tarantov <zeev.tarantov@gmail.com>
+
+File modified for Sereal by
+Steffen Mueller <smueller@cpan.org>
*/
#ifndef CSNAPPY_INTERNAL_USERSPACE_H_
#define DCHECK(cond)
#endif
+#include "csnappy_compat.h"
+
/*
Uses code from http://code.google.com/p/exfat/source/browse/trunk/libexfat/byteorder.h
with 3-clause BSD license instead of GPL, with permission from:
struct una_u64 { uint64_t x; };
#pragma pack()
-static inline uint64_t UNALIGNED_LOAD64(const void *p)
+static INLINE uint64_t UNALIGNED_LOAD64(const void *p)
{
const struct una_u64 *ptr = (const struct una_u64 *)p;
return ptr->x;
}
-static inline void UNALIGNED_STORE64(void *p, uint64_t v)
+static INLINE void UNALIGNED_STORE64(void *p, uint64_t v)
{
struct una_u64 *ptr = (struct una_u64 *)p;
ptr->x = v;
struct una_u64 { uint64_t x; };
#pragma pack()
-static inline uint16_t UNALIGNED_LOAD16(const void *p)
+static INLINE uint16_t UNALIGNED_LOAD16(const void *p)
{
const struct una_u16 *ptr = (const struct una_u16 *)p;
return ptr->x;
}
-static inline uint32_t UNALIGNED_LOAD32(const void *p)
+static INLINE uint32_t UNALIGNED_LOAD32(const void *p)
{
const struct una_u32 *ptr = (const struct una_u32 *)p;
return ptr->x;
}
-static inline uint64_t UNALIGNED_LOAD64(const void *p)
+static INLINE uint64_t UNALIGNED_LOAD64(const void *p)
{
const struct una_u64 *ptr = (const struct una_u64 *)p;
return ptr->x;
}
-static inline void UNALIGNED_STORE16(void *p, uint16_t v)
+static INLINE void UNALIGNED_STORE16(void *p, uint16_t v)
{
struct una_u16 *ptr = (struct una_u16 *)p;
ptr->x = v;
}
-static inline void UNALIGNED_STORE32(void *p, uint32_t v)
+static INLINE void UNALIGNED_STORE32(void *p, uint32_t v)
{
struct una_u32 *ptr = (struct una_u32 *)p;
ptr->x = v;
}
-static inline void UNALIGNED_STORE64(void *p, uint64_t v)
+static INLINE void UNALIGNED_STORE64(void *p, uint64_t v)
{
struct una_u64 *ptr = (struct una_u64 *)p;
ptr->x = v;
#define get_unaligned_le32(p) UNALIGNED_LOAD32(p)
#define put_unaligned_le16(v, p) UNALIGNED_STORE16(p, v)
#elif __BYTE_ORDER == __BIG_ENDIAN
-static inline uint32_t get_unaligned_le32(const void *p)
+static INLINE uint32_t get_unaligned_le32(const void *p)
{
return bswap_32(UNALIGNED_LOAD32(p));
}
-static inline void put_unaligned_le16(uint16_t val, void *p)
+static INLINE void put_unaligned_le16(uint16_t val, void *p)
{
UNALIGNED_STORE16(p, bswap_16(val));
}
#else
-static inline uint32_t get_unaligned_le32(const void *p)
+static INLINE uint32_t get_unaligned_le32(const void *p)
{
const uint8_t *b = (const uint8_t *)p;
return b[0] | (b[1] << 8) | (b[2] << 16) | (b[3] << 24);
}
-static inline void put_unaligned_le16(uint16_t val, void *p)
+static INLINE void put_unaligned_le16(uint16_t val, void *p)
{
uint8_t *b = (uint8_t *)p;
b[0] = val & 255;
#if defined(HAVE_BUILTIN_CTZ)
-static inline int FindLSBSetNonZero(uint32_t n)
+static INLINE int FindLSBSetNonZero(uint32_t n)
{
return __builtin_ctz(n);
}
-static inline int FindLSBSetNonZero64(uint64_t n)
+static INLINE int FindLSBSetNonZero64(uint64_t n)
{
return __builtin_ctzll(n);
}
#else /* Portable versions. */
-static inline int FindLSBSetNonZero(uint32_t n)
+static INLINE int FindLSBSetNonZero(uint32_t n)
{
int rc = 31, i, shift;
uint32_t x;
}
/* FindLSBSetNonZero64() is defined in terms of FindLSBSetNonZero(). */
-static inline int FindLSBSetNonZero64(uint64_t n)
+static INLINE int FindLSBSetNonZero64(uint64_t n)
{
const uint32_t bottombits = (uint32_t)n;
if (bottombits == 0) {
#define MY_CAN_FIND_PLACEHOLDERS
#define HAS_SV2OBJ
#endif
+#if (PERL_VERSION >= 10)
+#define HAS_HV_BACKREFS
+#endif
#include "srl_protocol.h"
#include "srl_encoder.h"
/* some static function declarations */
static void srl_dump_sv(pTHX_ srl_encoder_t *enc, SV *src);
-static void srl_dump_pv(pTHX_ srl_encoder_t *enc, const char* src, STRLEN src_len, int is_utf8);
+SRL_STATIC_INLINE void srl_dump_svpv(pTHX_ srl_encoder_t *enc, SV *src);
+SRL_STATIC_INLINE void srl_dump_pv(pTHX_ srl_encoder_t *enc, const char* src, STRLEN src_len, int is_utf8);
SRL_STATIC_INLINE void srl_fixup_weakrefs(pTHX_ srl_encoder_t *enc);
SRL_STATIC_INLINE void srl_dump_av(pTHX_ srl_encoder_t *enc, AV *src, U32 refcnt);
SRL_STATIC_INLINE void srl_dump_hv(pTHX_ srl_encoder_t *enc, HV *src, U32 refcnt);
SRL_STATIC_INLINE PTABLE_t *srl_init_string_hash(srl_encoder_t *enc);
SRL_STATIC_INLINE PTABLE_t *srl_init_ref_hash(srl_encoder_t *enc);
SRL_STATIC_INLINE PTABLE_t *srl_init_weak_hash(srl_encoder_t *enc);
+SRL_STATIC_INLINE HV *srl_init_string_deduper_hv(pTHX_ srl_encoder_t *enc);
+
+#define SRL_GET_STR_DEDUPER_HV(enc) ( (enc)->string_deduper_hv == NULL \
+ ? srl_init_string_deduper_hv(aTHX_ enc) \
+ : (enc)->string_deduper_hv )
-#define SRL_GET_STR_SEENHASH(enc) ( (enc)->str_seenhash == NULL \
+#define SRL_GET_STR_PTR_SEENHASH(enc) ( (enc)->str_seenhash == NULL \
? srl_init_string_hash(enc) \
: (enc)->str_seenhash )
!SvROK((src)) \
) { \
if (SvPOKp((src))) { \
- STRLEN len; \
- char *str = SvPV((src), len); \
- srl_dump_pv(aTHX_ (enc), str, len, SvUTF8((src))); \
+ srl_dump_svpv(aTHX_ (enc), (src)); \
} \
else \
if (SvNOKp((src))) { \
PTABLE_clear(enc->str_seenhash);
if (enc->weak_seenhash != NULL)
PTABLE_clear(enc->weak_seenhash);
+ if (enc->string_deduper_hv != NULL)
+ hv_clear(enc->string_deduper_hv);
enc->pos = enc->buf_start;
SRL_ENC_RESET_OPER_FLAG(enc, SRL_OF_ENCODER_DIRTY);
PTABLE_free(enc->str_seenhash);
if (enc->weak_seenhash != NULL)
PTABLE_free(enc->weak_seenhash);
+ if (enc->string_deduper_hv != NULL)
+ SvREFCNT_dec(enc->string_deduper_hv);
Safefree(enc);
}
enc->str_seenhash = NULL;
enc->ref_seenhash = NULL;
enc->snappy_workmem = NULL;
+ enc->string_deduper_hv = NULL;
return enc;
}
if ( svp && SvTRUE(*svp) )
enc->flags |= SRL_F_CROAK_ON_BLESS;
+ svp = hv_fetchs(opt, "no_bless_objects", 0);
+ if ( svp && SvTRUE(*svp) )
+ enc->flags |= SRL_F_NO_BLESS_OBJECTS;
+
svp = hv_fetchs(opt, "snappy", 0);
if ( svp && SvTRUE(*svp) )
enc->flags |= SRL_F_COMPRESS_SNAPPY;
svp = hv_fetchs(opt, "sort_keys", 0);
if ( svp && SvTRUE(*svp) ) {
- undef_unknown = 1;
enc->flags |= SRL_F_SORT_KEYS;
}
+ svp = hv_fetchs(opt, "dedupe_strings", 0);
+ if ( svp && SvTRUE(*svp) ) {
+ enc->flags |= SRL_F_DEDUPE_STRINGS;
+ }
+
svp = hv_fetchs(opt, "stringify_unknown", 0);
if ( svp && SvTRUE(*svp) ) {
if (expect_false( undef_unknown )) {
return enc->weak_seenhash;
}
+SRL_STATIC_INLINE HV *
+srl_init_string_deduper_hv(pTHX_ srl_encoder_t *enc)
+{
+ enc->string_deduper_hv = newHV();
+ return enc->string_deduper_hv;
+}
void
srl_write_header(pTHX_ srl_encoder_t *enc)
srl_dump_classname(pTHX_ srl_encoder_t *enc, SV *src)
{
const HV *stash = SvSTASH(src);
- PTABLE_t *string_seenhash = SRL_GET_STR_SEENHASH(enc);
+ PTABLE_t *string_seenhash = SRL_GET_STR_PTR_SEENHASH(enc);
const ptrdiff_t oldoffset = (ptrdiff_t)PTABLE_fetch(string_seenhash, (SV *)stash);
if (oldoffset != 0) {
#endif
)
{
- PTABLE_t *string_seenhash = SRL_GET_STR_SEENHASH(enc);
+ PTABLE_t *string_seenhash = SRL_GET_STR_PTR_SEENHASH(enc);
const ptrdiff_t oldoffset = (ptrdiff_t)PTABLE_fetch(string_seenhash, str);
if (oldoffset != 0) {
/* Issue COPY instead of literal hash key string */
}
}
+SRL_STATIC_INLINE void
+srl_dump_svpv(pTHX_ srl_encoder_t *enc, SV *src)
+{
+ STRLEN len;
+ const char const *str= SvPV(src, len);
+ if ( SRL_ENC_HAVE_OPTION(enc, SRL_F_DEDUPE_STRINGS) && len > 3 ) {
+ HV *string_deduper_hv= SRL_GET_STR_DEDUPER_HV(enc);
+ HE *dupe_offset_he= hv_fetch_ent(string_deduper_hv, src, 1, 0);
+ if (!dupe_offset_he) {
+ croak("out of memory (hv_fetch_ent returned NULL)");
+ } else {
+ SV *ofs_sv= HeVAL(dupe_offset_he);
+ if (SvIOK(ofs_sv)) {
+ /* emit copy */
+ srl_buf_cat_varint(aTHX_ enc, SRL_HDR_COPY, SvIV(ofs_sv));
+ return;
+ } else if (SvUOK(ofs_sv)) {
+ srl_buf_cat_varint(aTHX_ enc, SRL_HDR_COPY, SvUV(ofs_sv));
+ return;
+ } else {
+ sv_setuv(ofs_sv, (UV)BUF_POS_OFS(enc));
+ }
+ }
+ }
+ srl_dump_pv(aTHX_ enc, str, len, SvUTF8(src));
+}
-static void
+SRL_STATIC_INLINE void
srl_dump_pv(pTHX_ srl_encoder_t *enc, const char* src, STRLEN src_len, int is_utf8)
{
BUF_SIZE_ASSERT(enc, 1 + SRL_MAX_VARINT_LENGTH + src_len); /* overallocate a bit sometimes */
enc->pos += src_len;
}
-
-
-
/* Dumps generic SVs and delegates
* to more specialized functions for RVs, etc. */
/* TODO decide when to use the IV, when to use the PV, and when
UV weakref_ofs= 0; /* preserved between loops */
SSize_t ref_rewrite_pos= 0; /* preserved between loops - note SSize_t is a perl define */
assert(src);
+ int nobless = SRL_ENC_HAVE_OPTION(enc, SRL_F_NO_BLESS_OBJECTS);
if (++enc->recursion_depth == enc->max_recursion_depth) {
croak("Hit maximum recursion depth (%lu), aborting serialization",
DEBUG_ASSERT_BUF_SANE(enc);
if ( SvMAGICAL(src) ) {
SvGETMAGIC(src);
-#ifdef Perl_hv_backreferences_p
+#ifdef HAS_HV_BACKREFS
if (svt != SVt_PVHV)
#endif
mg = mg_find(src, PERL_MAGIC_backref);
}
-#ifdef Perl_hv_backreferences_p
- if (svt == SVt_PVHV)
+#ifdef HAS_HV_BACKREFS
+ if (svt == SVt_PVHV) {
backrefs= *Perl_hv_backreferences_p(aTHX_ (HV *)src);
+ if (DEBUGHACK) warn("backreferences %p", src);
+ }
#endif
if ( mg || backrefs ) {
PTABLE_t *weak_seenhash= SRL_GET_WEAK_SEENHASH(enc);
}
if (weakref_ofs != 0) {
sv_dump(src);
- assert(weakref_ofs == 0);
+ croak("Corrupted weakref? weakref_ofs=0 (this should not happen)");
}
if (SvPOKp(src)) {
#if defined(MODERN_REGEXP) && !defined(REGEXP_NO_LONGER_POK)
}
else
#endif
- {
- STRLEN len;
- char *str = SvPV(src, len);
- srl_dump_pv(aTHX_ enc, str, len, SvUTF8(src));
- }
+ srl_dump_svpv(aTHX_ enc, src);
}
else
#if defined(MODERN_REGEXP) && defined(REGEXP_NO_LONGER_POK)
}
#endif
if (SvWEAKREF(src)) {
+ if (DEBUGHACK) warn("Is weakref %p", src);
weakref_ofs= BUF_POS_OFS(enc);
srl_buf_cat_char(enc, SRL_HDR_WEAKEN);
}
}
/* FIXME reuse/ref/... should INCLUDE the bless stuff. */
/* Write bless operator with class name */
- srl_dump_classname(aTHX_ enc, referent);
+ if (!nobless)
+ srl_dump_classname(aTHX_ enc, referent);
}
srl_buf_cat_char(enc, SRL_HDR_REFN);
refsv= src;
src= referent;
+ if (DEBUGHACK) warn("Going to redo %p", src);
goto redo_dump;
}
else
UV max_recursion_depth; /* Configurable limit on the number of recursive calls we're willing to make */
UV recursion_depth; /* current Perl-ref recursion depth */
- ptable_ptr ref_seenhash; /* ptr table for avoiding circular refs */
+ ptable_ptr ref_seenhash; /* ptr table for avoiding circular refs */
ptable_ptr weak_seenhash; /* ptr table for avoiding dangling weakrefs */
- ptable_ptr str_seenhash; /* ptr table for issuing COPY commands */
+ ptable_ptr str_seenhash; /* ptr table for issuing COPY commands based on PTRS (used for classnames and keys) */
+ HV *string_deduper_hv; /* track strings we have seen before, by content */
void *snappy_workmem; /* lazily allocated if and only if using Snappy */
IV snappy_threshold; /* do not compress things smaller than this even if Snappy enabled */
/* Will default to "on". If set, hash keys will be shared using COPY.
* Corresponds to the inverse of constructor option "no_shared_hashkeys" */
-#define SRL_F_SHARED_HASHKEYS 1UL
+#define SRL_F_SHARED_HASHKEYS 0x00001UL
/* If set, then we're using the OO interface and we shouldn't destroy the
* encoder struct during SAVEDESTRUCTOR_X time */
-#define SRL_F_REUSE_ENCODER 2UL
+#define SRL_F_REUSE_ENCODER 0x00002UL
/* If set in flags, then we rather croak than serialize an object.
* Corresponds to the 'croak_on_bless' option to the Perl constructor. */
-#define SRL_F_CROAK_ON_BLESS 4UL
+#define SRL_F_CROAK_ON_BLESS 0x00004UL
/* If set in flags, then we will emit <undef> for all data types
* that aren't supported. Corresponds to the 'undef_unknown' option. */
-#define SRL_F_UNDEF_UNKNOWN 8UL
+#define SRL_F_UNDEF_UNKNOWN 0x00008UL
/* If set in flags, then we will stringify (SvPV) all data types
* that aren't supported. Corresponds to the 'stringify_unknown' option. */
-#define SRL_F_STRINGIFY_UNKNOWN 16UL
+#define SRL_F_STRINGIFY_UNKNOWN 0x00010UL
/* If set in flags, then we warn() when trying to serialize an unsupported
* data structure. Applies only if stringify_unknown or undef_unknown are
* set since we otherwise croak. Corresponds to the 'warn_unknown' option. */
-#define SRL_F_WARN_UNKNOWN 32UL
+#define SRL_F_WARN_UNKNOWN 0x00020UL
/* WARNING: This is different from the protocol bit SRL_PROTOCOL_ENCODING_SNAPPY in that it's
* a flag on the encoder struct indicating that we want to use Snappy. */
-#define SRL_F_COMPRESS_SNAPPY 64UL
-#define SRL_F_COMPRESS_SNAPPY_INCREMENTAL 128UL
+#define SRL_F_COMPRESS_SNAPPY 0x00040UL
+#define SRL_F_COMPRESS_SNAPPY_INCREMENTAL 0x00080UL
/* Only meaningful if SRL_F_WARN_UNKNOWN also set. If this one is set, then we don't warn
* if the unsupported item has string overloading. */
-#define SRL_F_NOWARN_UNKNOWN_OVERLOAD 256UL
+#define SRL_F_NOWARN_UNKNOWN_OVERLOAD 0x00100UL
/* Only meaningful if SRL_F_WARN_UNKNOWN also set. If this one is set, then we don't warn
* if the unsupported item has string overloading. */
-#define SRL_F_SORT_KEYS 512UL
+#define SRL_F_SORT_KEYS 0x00200UL
+
+#define SRL_F_DEDUPE_STRINGS 0x00400UL
+
+/* If set in flags, then we serialize objects without class information.
+ * Corresponds to the 'no_bless_objects' flag found in the Decoder. */
+#define SRL_F_NO_BLESS_OBJECTS 0x00800UL
/* Set while the encoder is in active use / dirty */
#define SRL_OF_ENCODER_DIRTY 1UL
run_tests("plain");
run_tests("no_shared_hk", {no_shared_hashkeys => 1});
+run_tests("dedupe_strings", {dedupe_strings => 1});
done_testing();
sub run_tests {
--- /dev/null
+#!perl
+use strict;
+use warnings;
+use Test::More tests => 1;
+use Sereal::Encoder qw(encode_sereal);
+
+eval { encode_sereal(\1, { sort_keys => 1, stringify_unknown => 1 }); };
+ok !$@, "We shouldn't die on sort_keys combined with stringify_unknown";
+
+
--- /dev/null
+#!perl
+use strict;
+use warnings;
+use Sereal::Encoder;
+use File::Spec;
+use Scalar::Util qw( blessed );
+use lib File::Spec->catdir(qw(t lib));
+BEGIN {
+ lib->import('lib')
+ if !-d 't';
+}
+
+use Sereal::TestSet qw(:all);
+use Test::More;
+
+my $ok = have_encoder_and_decoder();
+if (not $ok) {
+ plan skip_all => 'Did not find right version of encoder';
+}
+else {
+ my $class = 'MyFoo';
+ my %hash = ( x => 1 );
+ my $object = bless( \%hash, $class );
+ my $dec = Sereal::Decoder->new();
+
+ # do not bless anything
+ {
+ my $enc = Sereal::Encoder->new({ no_bless_objects => 1 });
+ my $blob = $enc->encode( $object );
+
+ my $data = $dec->decode( $blob );
+
+ ok( ref( $data ) && !blessed( $data ), 'reference without class' );
+ is_deeply( $data, \%hash, 'same structure' );
+ }
+
+ # normally do the blessing
+ {
+ my $enc = Sereal::Encoder->new();
+ my $blob = $enc->encode( $object );
+
+ my $data = $dec->decode( $blob );
+
+ is_deeply( $data, $object, 'same structure' );
+ isa_ok( $data, $class, 'same class' );
+ }
+}
+
+done_testing();
+
};
$enc->encode(["foo", sub{}]);
};
- ok($die_run == 2, "__DIE__ called, encode 2 did not die");
+ ok($die_run == 2, "__DIE__ called, encode 2 did not die ($die_run)");
}
# github Sereal/Sereal issue 7 regression test:
['snappy', { snappy => 1 } ],
['snappy_incr', { snappy_incr => 1 } ],
['sort_keys', { sort_keys => 1 } ],
+ ['dedupe_strings', { dedupe_strings => 1 } ],
) {
run_roundtrip_tests_internal(@$opt);
}