From: gregor herrmann <gregoa@debian.org>
Date: Sun, 29 Nov 2015 14:04:25 +0000 (+0100)
Subject: Imported Upstream version 3.008
X-Git-Tag: archive/raspbian/4.017+ds-1+rpi1~1^2~3^2~10
X-Git-Url: https://dgit.raspbian.org/?a=commitdiff_plain;h=0b784a6d5ca047063ff50de0ee389b38a6bbd80b;p=libsereal-encoder-perl.git

Imported Upstream version 3.008
---

diff --git a/Changes b/Changes
index 4c81c17..0d32c09 100644
--- a/Changes
+++ b/Changes
@@ -5,6 +5,48 @@ Revision history for Perl extension Sereal-Encoder
 *          of the decoder before upgrading to version 3 of the *
 *          encoder!                                            *
 ****************************************************************
+3.008 Nov 27 2015
+  * FIX: Make sure that reserializing a data structure created
+    using alias_varint_under does not produce a corrupted dataset.
+    Thanks to Iskra for the report.
+  * FIX: Precedence issue related to sort keys options. Thanks
+    to Petr PÃ­saÅ <ppisar@redhat.com> for the report and fix.
+  * FIX: Win32 build issues, thanks to bulk88 for help.
+
+3.007 Nov 26 2015
+  * FIX: sorting should now work with tied hashes, and
+    be in general faster.
+  * CHANGED: Sort order for sort_keys=1 is now defined to be
+    "in order by length of bytes, then by byte order of the 
+    underlying string, then by utd8ness, with non-utf8 first". 
+    This sort order was chosen because it requires the least 
+    operations to perform in the most cases, has a bounded number
+    of cases where we would have to create any temporary SV's or 
+    to do operations that require us to introspect codepoints in
+    utf8 strings. IOW purely performance. Note that so long
+    as you compare data created with the same sort_keys setting
+    you will ALWAYS get the same order *in process*, regardless
+    of what you choose. Backwards compatible support for  the
+    old order is available via sort_keys=3.
+    
+    ADDED: sort_keys=2 to provide "perl cmp order"
+           (Its just there because it is easy to do, not for any
+            good reason.)
+    ADDED: sort_keys=3 to provide "rev perl cmp order"
+
+    Legacy sort order was equivalent to sort_keys=3, use this 
+    option *ONLY* if you are extremely sensitive to changes in the
+    sorted order (or "canonical form").
+
+  * [DECODER] Build fixes for boxes without any Sereal installed (it seems
+    to be common on certain types of smokers, and not on others).
+  * Fix issues with serializing blessed scalars where we see the
+    blessed scalar before we see the reference to it.
+  * Handle PVLV undefs
+  * Build fixes for 5.8.9
+  * Doc patches
+  * Optimizations
+
 3.006 Nov 14 2015
   * [DECODER] fix segfaults from heavily corrupted data. Guards
     against various pathological cases which could cause segfaults
diff --git a/Encoder.xs b/Encoder.xs
index 4c54de8..b7d8e5a 100644
--- a/Encoder.xs
+++ b/Encoder.xs
@@ -5,7 +5,7 @@
 #include "perl.h"
 #include "XSUB.h"
 
-#define NEED_newSV_type
+#define NEED_newSV_type_GLOBAL
 #include "ppport.h"
 
 #include "srl_encoder.h"
@@ -207,6 +207,12 @@ DESTROY(enc)
   CODE:
     srl_destroy_encoder(aTHX_ enc);
 
+U32
+flags(enc)
+    srl_encoder_t *enc;
+  CODE:
+    RETVAL = enc->flags;
+  OUTPUT: RETVAL
 
 void
 encode_sereal(src, opt = NULL)
@@ -278,3 +284,5 @@ test()
     }
     PTABLE_iter_free(iter);
     PTABLE_free(tbl);
+
+
diff --git a/MANIFEST b/MANIFEST
index e16aeda..e05928f 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -5,6 +5,7 @@ author_tools/freeze_thaw_timing.pl
 author_tools/hobodecoder.pl
 author_tools/numeric_str_length.c
 author_tools/stringify_test.c
+author_tools/update_flag_consts.pl
 author_tools/update_from_header.pl
 author_tools/valgrind.supp
 Changes
@@ -20,6 +21,7 @@ miniz.c
 miniz.h
 ppport.h
 ptable.h
+qsort.h
 snappy/csnappy.h
 snappy/csnappy_compat.h
 snappy/csnappy_compress.c
@@ -54,6 +56,7 @@ t/020_sort_keys.t
 t/021_sort_keys_option.t
 t/022_canonical_refs.t
 t/030_canonical_vs_test_deep.t
+t/040_tied_hash.t
 t/110_nobless.t
 t/120_hdr_data.t
 t/130_freezethaw.t
@@ -86,6 +89,8 @@ t/700_roundtrip/v3/snappy_canon.t
 t/700_roundtrip/v3/snappy_incr.t
 t/700_roundtrip/v3/snappy_incr_canon.t
 t/700_roundtrip/v3/sort_keys.t
+t/700_roundtrip/v3/sort_keys_perl.t
+t/700_roundtrip/v3/sort_keys_perl_rev.t
 t/700_roundtrip/v3/zlib.t
 t/700_roundtrip/v3/zlib_force.t
 t/800_threads.t
diff --git a/META.json b/META.json
index f300ee5..1edb16b 100644
--- a/META.json
+++ b/META.json
@@ -4,7 +4,7 @@
       "Steffen Mueller <smueller@cpan.org>, Yves Orton <yves@cpan.org>"
    ],
    "dynamic_config" : 1,
-   "generated_by" : "ExtUtils::MakeMaker version 7.0401, CPAN::Meta::Converter version 2.150001",
+   "generated_by" : "ExtUtils::MakeMaker version 6.66, CPAN::Meta::Converter version 2.133380",
    "license" : [
       "perl_5"
    ],
@@ -19,37 +19,6 @@
          "inc"
       ]
    },
-   "prereqs" : {
-      "build" : {
-         "requires" : {
-            "ExtUtils::ParseXS" : "2.21",
-            "File::Find" : "0",
-            "File::Path" : "0"
-         }
-      },
-      "configure" : {
-         "requires" : {
-            "ExtUtils::MakeMaker" : "7.0"
-         }
-      },
-      "runtime" : {
-         "requires" : {
-            "XSLoader" : "0",
-            "perl" : "5.008"
-         }
-      },
-      "test" : {
-         "requires" : {
-            "Data::Dumper" : "0",
-            "File::Spec" : "0",
-            "Scalar::Util" : "0",
-            "Sereal::Decoder" : "3.00",
-            "Test::LongString" : "0",
-            "Test::More" : "0.88",
-            "Test::Warn" : "0"
-         }
-      }
-   },
    "release_status" : "stable",
    "resources" : {
       "bugtracker" : {
@@ -60,5 +29,5 @@
          "url" : "git://github.com/Sereal/Sereal.git"
       }
    },
-   "version" : "3.006"
+   "version" : "3.008"
 }
diff --git a/META.yml b/META.yml
index 851d595..7c0ec83 100644
--- a/META.yml
+++ b/META.yml
@@ -2,34 +2,19 @@
 abstract: 'Fast, compact, powerful binary serialization'
 author:
   - 'Steffen Mueller <smueller@cpan.org>, Yves Orton <yves@cpan.org>'
-build_requires:
-  Data::Dumper: '0'
-  ExtUtils::ParseXS: '2.21'
-  File::Find: '0'
-  File::Path: '0'
-  File::Spec: '0'
-  Scalar::Util: '0'
-  Sereal::Decoder: '3.00'
-  Test::LongString: '0'
-  Test::More: '0.88'
-  Test::Warn: '0'
-configure_requires:
-  ExtUtils::MakeMaker: '7.0'
+build_requires: {}
 dynamic_config: 1
-generated_by: 'ExtUtils::MakeMaker version 7.0401, CPAN::Meta::Converter version 2.150001'
+generated_by: 'ExtUtils::MakeMaker version 6.66, CPAN::Meta::Converter version 2.133380'
 license: perl
 meta-spec:
   url: http://module-build.sourceforge.net/META-spec-v1.4.html
-  version: '1.4'
+  version: 1.4
 name: Sereal-Encoder
 no_index:
   directory:
     - t
     - inc
-requires:
-  XSLoader: '0'
-  perl: '5.008'
 resources:
   bugtracker: https://github.com/Sereal/Sereal/issues
   repository: git://github.com/Sereal/Sereal.git
-version: '3.006'
+version: 3.008
diff --git a/author_tools/hobodecoder.pl b/author_tools/hobodecoder.pl
index a1f920d..b29e692 100644
--- a/author_tools/hobodecoder.pl
+++ b/author_tools/hobodecoder.pl
@@ -4,6 +4,7 @@ use warnings;
 use Data::Dumper;
 
 use Getopt::Long qw(GetOptions);
+use Encode qw(encode_utf8 decode_utf8);
 our @constants;
 no warnings 'recursion';
 BEGIN {
@@ -156,13 +157,14 @@ sub parse_sv {
     my $len = $o;
     my $str = _chop_data_prefix( $len );
     $done .= $str;
-    printf "SHORT_BINARY(%u): '%s'\n", $len, $str;
+    printf "SHORT_BINARY(%u): '%s' (%s)\n", $len, encode_utf8($str), unpack("H*", $str);
   }
   elsif ($o == SRL_HDR_BINARY || $o == SRL_HDR_STR_UTF8) {
     my $l = varint();
     my $str = _chop_data_prefix( $l ); # fixme UTF8
     $done .= $str;
-    printf( ($o == SRL_HDR_STR_UTF8 ? "STR_UTF8" : "BINARY")."(%u): '%s'\n", $l, $str);
+    $str= decode_utf8($str) if $o == SRL_HDR_STR_UTF8;
+    printf( ($o == SRL_HDR_STR_UTF8 ? "STR_UTF8" : "BINARY")."(%u): '%s' (%s)\n", $l, encode_utf8($str), unpack("H*", encode_utf8($str)));
   }
   elsif ($o == SRL_HDR_FLOAT) {
     printf "FLOAT(%f)\n", parse_float();
diff --git a/author_tools/update_flag_consts.pl b/author_tools/update_flag_consts.pl
new file mode 100644
index 0000000..efe39fe
--- /dev/null
+++ b/author_tools/update_flag_consts.pl
@@ -0,0 +1,69 @@
+use strict;
+use warnings;
+use Data::Dumper;
+
+my %sets;
+my %flag_consts;
+my %flag_names;
+my (@flags, @static, @volatile);
+my $file= "srl_encoder.h";
+{
+    open my $fh, "<", $file
+        or die "Failed to open '$file' for read: $!";
+
+    while (<$fh>) {
+        if ( m/#define (SRL_F_(\w+))\s+(.*)/ || /(\w+(VOLATILE_FLAGS))\s+(.*)/ ) {
+            #print;
+            my $full_name= $1;
+            my $name= $2;
+            my $value= $3;
+            $name=~s/_?ENCODER_?//g;
+            $flag_names{$full_name}= $name;
+            if ($value=~s/UL\z//) {
+                $flag_consts{$full_name}= 0+eval $value;
+            } else {
+                $value =~ s/(SRL_F_\w+)/\$flag_consts{$1}/g;
+                $sets{"SRL_F_ENCODER_" . $name}= 0+eval $value;
+            }
+        }
+    }
+    foreach my $key ( sort { $flag_consts{$a} <=> $flag_consts{$b} } keys %flag_consts ) {
+        if (defined $sets{SRL_F_ENCODER_VOLATILE_FLAGS}) {
+            my $is_volatile= $flag_consts{$key} & $sets{SRL_F_ENCODER_VOLATILE_FLAGS};
+            push @static,   $is_volatile ? undef : $flag_names{$key};
+            push @volatile, $is_volatile ? $flag_names{$key} : undef;
+        }
+        push @flags, $flag_names{$key};
+    }
+}
+
+my %consts= (%sets, %flag_consts);
+$consts{_FLAG_NAME}= \@flags;
+if (@volatile) {
+    $consts{_FLAG_NAME_VOLATILE}= \@volatile;
+    $consts{_FLAG_NAME_STATIC}= \@static;
+}
+
+my $infile= "lib/Sereal/Encoder.pm";
+my $outfile= "$infile.new";
+open my $fh,"<", $infile
+    or die "Failed to read '$infile': $!";
+open my $ofh, ">", $outfile
+    or die "Failed to write to '$outfile': $!";
+while (<$fh>) {
+    if (/#begin generated/) {
+        print $ofh $_;
+        my $s= Data::Dumper->new([\%consts])->Sortkeys(1)->Terse(1)->Dump();
+        chop($s);
+        $s.="; #end generated\n";
+        print $ofh $s;
+    }
+    unless(/#begin generated/ ... /#end generated/) {
+        print $ofh $_;
+    }
+}
+close $ofh;
+close $fh;
+rename $infile, "$infile.bak" or die "Failed to rename '$infile' to '$infile.bak': $!";
+rename $outfile, $infile or die "Failed to rename '$outfile' to '$infile': $!";
+
diff --git a/inc/Sereal/BuildTools.pm b/inc/Sereal/BuildTools.pm
index 7310fb0..d4e89d7 100644
--- a/inc/Sereal/BuildTools.pm
+++ b/inc/Sereal/BuildTools.pm
@@ -3,10 +3,14 @@ use strict;
 use warnings;
 
 use Config;
+use constant OSNAME => $^O;
 
 sub link_files {
   my $shared_dir = shift;
-  my $exlude_tests = shift;
+  my $do_tests = shift || "";
+  my $exclude_tests= $do_tests eq "without_tests";
+  my $tests_only= $do_tests eq "tests_only";
+
   # This fires from a git source tree only.
   # Right now, all devs are on Linux. Feel free to make portable.
   eval {
@@ -20,7 +24,8 @@ sub link_files {
           my $f = $_;
           s/^\Q$shared_dir\E\/?// or die $_;
           return unless $_;
-          return if $exlude_tests && m#^/?t/#;
+          return if $exclude_tests && m#^/?t/#;
+          return if $tests_only && !m#^/?t/#;
 
           if (-d $f) {
             File::Path::mkpath($_)
@@ -32,7 +37,14 @@ sub link_files {
             my $ref = join "/", ("..") x scalar(@d);
             my $subd = join "/", @d;
             chdir $subd if length($ref);
-            symlink(join("/", grep length, $ref, $shared_dir, $subd, $fname), $fname);
+            my $srcfname = join("/", grep length, $ref, $shared_dir, $subd, $fname);
+            if (OSNAME eq 'MSWin32') {
+              die "link($srcfname, $fname) failed: $!"
+                unless link($srcfname, $fname); #only NTFS implements it
+            }
+            else {
+              symlink($srcfname, $fname);
+            }
             chdir($ref) if length($ref);
           }
         },
diff --git a/lib/Sereal/Encoder.pm b/lib/Sereal/Encoder.pm
index 5c1f1d7..431a131 100644
--- a/lib/Sereal/Encoder.pm
+++ b/lib/Sereal/Encoder.pm
@@ -5,7 +5,7 @@ use warnings;
 use Carp qw/croak/;
 use XSLoader;
 
-our $VERSION = '3.006'; # Don't forget to update the TestCompat set for testing against installed decoders!
+our $VERSION = '3.008'; # Don't forget to update the TestCompat set for testing against installed decoders!
 our $XS_VERSION = $VERSION; $VERSION= eval $VERSION;
 
 # not for public consumption, just for testing.
@@ -16,9 +16,52 @@ sub _test_compat {return(@$TestCompat, $VERSION)}
 # Make sure to keep these constants in sync with the C code in srl_encoder.c.
 # I know they could be exported from C using things like ExtUtils::Constant,
 # but that's too much of a hassle for just three numbers.
-use constant SRL_UNCOMPRESSED => 0;
-use constant SRL_SNAPPY       => 1;
-use constant SRL_ZLIB         => 2;
+use constant {
+    SRL_UNCOMPRESSED => 0,
+    SRL_SNAPPY       => 1,
+    SRL_ZLIB         => 2,
+};
+use constant #begin generated
+{
+  'SRL_F_ALIASED_DEDUPE_STRINGS' => 4096,
+  'SRL_F_CANONICAL_REFS' => 32768,
+  'SRL_F_COMPRESS_SNAPPY' => 64,
+  'SRL_F_COMPRESS_SNAPPY_INCREMENTAL' => 128,
+  'SRL_F_COMPRESS_ZLIB' => 256,
+  'SRL_F_CROAK_ON_BLESS' => 4,
+  'SRL_F_DEDUPE_STRINGS' => 2048,
+  'SRL_F_ENABLE_FREEZE_SUPPORT' => 16384,
+  'SRL_F_NOWARN_UNKNOWN_OVERLOAD' => 512,
+  'SRL_F_NO_BLESS_OBJECTS' => 8192,
+  'SRL_F_REUSE_ENCODER' => 2,
+  'SRL_F_SHARED_HASHKEYS' => 1,
+  'SRL_F_SORT_KEYS' => 1024,
+  'SRL_F_SORT_KEYS_PERL' => 65536,
+  'SRL_F_SORT_KEYS_PERL_REV' => 131072,
+  'SRL_F_STRINGIFY_UNKNOWN' => 16,
+  'SRL_F_UNDEF_UNKNOWN' => 8,
+  'SRL_F_WARN_UNKNOWN' => 32,
+  '_FLAG_NAME' => [
+                    'SHARED_HASHKEYS',
+                    'REUSE',
+                    'CROAK_ON_BLESS',
+                    'UNDEF_UNKNOWN',
+                    'STRINGIFY_UNKNOWN',
+                    'WARN_UNKNOWN',
+                    'COMPRESS_SNAPPY',
+                    'COMPRESS_SNAPPY_INCREMENTAL',
+                    'COMPRESS_ZLIB',
+                    'NOWARN_UNKNOWN_OVERLOAD',
+                    'SORT_KEYS',
+                    'DEDUPE_STRINGS',
+                    'ALIASED_DEDUPE_STRINGS',
+                    'NO_BLESS_OBJECTS',
+                    'ENABLE_FREEZE_SUPPORT',
+                    'CANONICAL_REFS',
+                    'SORT_KEYS_PERL',
+                    'SORT_KEYS_PERL_REV'
+                  ]
+}; #end generated
 
 use Exporter 'import';
 our @EXPORT_OK = qw(
@@ -37,6 +80,29 @@ sub CLONE_SKIP {1}
 
 XSLoader::load('Sereal::Encoder', $XS_VERSION);
 
+sub encode_to_file {
+    my ($self, $file, $struct, $append)= @_;
+    my $mode= $append ? ">>" : ">";
+    open my $fh, $mode, $file
+        or die "Failed to open '$file' for " . ($append ? "append" : "write") . ": $!";
+    print $fh $self->encode($struct)
+        or die "Failed to print to '$file': $!";
+    close $fh
+        or die "Failed to close '$file': $!";
+}
+
+my $flags= sub {
+    my ($int, $ary)= @_;
+    return map {
+        ($ary->[$_] and $int & (1 << $_)) ? $ary->[$_] : ()
+    } (0..$#$ary);
+};
+
+sub flag_names {
+    my ($self, $val)= @_;
+    return $flags->($val // $self->flags, _FLAG_NAME);
+}
+
 1;
 
 __END__
@@ -275,11 +341,23 @@ variables on use, and some of its rules are a little arcane (for instance utf8
 keys), and so two hashes that might appear to be the same might still produce
 different output as far as Sereal is concerned.
 
-The thusly allocated encoder object and its output buffer will be reused
-between invocations of C<encode()>, so hold on to it for an efficiency
-gain if you plan to serialize multiple similar data structures, but destroy
-it if you serialize a single very large data structure just once to free
-the memory.
+As of 3.006_007 (prerelease candidate for 3.007) the sort order has been changed
+to the following: order by length of keys (in bytes) ascending, then by byte
+order of the raw underlying string, then by utf8ness, with non-utf8 first. This
+order was chosen because it is the most efficient to implement, both in terms
+of memory and time. This sort order is enabled when sort_keys is set to 1.
+
+You may also produce output in Perl "cmp" order, by setting sort_keys to 2.
+And for backwards compatibility you may also produce output in reverse Perl
+"cmp" order by setting sort_keys to 3. Prior to 3.006_007 this was the
+only sort order possible, although it was not explicitly defined what it was.
+
+Note that comparatively speaking both of the "cmp" sort orders are slow and
+memory inefficient. Unless you have a really good reason stick to the default
+which is fast and as lean as possible.
+
+Unless you are concerned with "cross process canonical representation" then
+it doesn't matter what option you choose. 
 
 See L</CANONICAL REPRESENTATION> for why you might want to use this, and
 for the various caveats involved.
@@ -736,7 +814,7 @@ Sereal currently will choose the *string* value when it detects these items.
 
 It is possible that a future release of the protocol will fix these issues.
 
-=back 4
+=back
 
 =head1 BUGS, CONTACT AND SUPPORT
 
diff --git a/lib/Sereal/Encoder/Constants.pm b/lib/Sereal/Encoder/Constants.pm
index 5394539..bd33273 100644
--- a/lib/Sereal/Encoder/Constants.pm
+++ b/lib/Sereal/Encoder/Constants.pm
@@ -4,7 +4,7 @@ use warnings;
 require Exporter;
 our @ISA= qw(Exporter);
 
-our $VERSION = '3.006'; # Don't forget to update the TestCompat set for testing against installed encoders!
+our $VERSION = '3.008'; # Don't forget to update the TestCompat set for testing against installed encoders!
 
 our (@EXPORT_OK, %DEFINE, %TAG_INFO_HASH, @TAG_INFO_ARRAY);
 
diff --git a/qsort.h b/qsort.h
new file mode 100644
index 0000000..07480d5
--- /dev/null
+++ b/qsort.h
@@ -0,0 +1,290 @@
+/* $Id: qsort.h,v 1.5 2008-01-28 18:16:49 mjt Exp $
+ * Adopted from GNU glibc by Mjt.
+ * See stdlib/qsort.c in glibc */
+
+/* Copyright (C) 1991, 1992, 1996, 1997, 1999 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Written by Douglas C. Schmidt (schmidt@ics.uci.edu).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+/* in-line qsort implementation.  Differs from traditional qsort() routine
+ * in that it is a macro, not a function, and instead of passing an address
+ * of a comparison routine to the function, it is possible to inline
+ * comparison routine, thus speeding up sorting a lot.
+ *
+ * Usage:
+ *  #include "iqsort.h"
+ *  #define islt(a,b) (strcmp((*a),(*b))<0)
+ *  char *arr[];
+ *  int n;
+ *  QSORT(char*, arr, n, islt);
+ *
+ * The "prototype" and 4 arguments are:
+ *  QSORT(TYPE,BASE,NELT,ISLT)
+ *  1) type of each element, TYPE,
+ *  2) address of the beginning of the array, of type TYPE*,
+ *  3) number of elements in the array, and
+ *  4) comparision routine.
+ * Array pointer and number of elements are referenced only once.
+ * This is similar to a call
+ *  qsort(BASE,NELT,sizeof(TYPE),ISLT)
+ * with the difference in last parameter.
+ * Note the islt macro/routine (it receives pointers to two elements):
+ * the only condition of interest is whenever one element is less than
+ * another, no other conditions (greather than, equal to etc) are tested.
+ * So, for example, to define integer sort, use:
+ *  #define islt(a,b) ((*a)<(*b))
+ *  QSORT(int, arr, n, islt)
+ *
+ * The macro could be used to implement a sorting function (see examples
+ * below), or to implement the sorting algorithm inline.  That is, either
+ * create a sorting function and use it whenever you want to sort something,
+ * or use QSORT() macro directly instead a call to such routine.  Note that
+ * the macro expands to quite some code (compiled size of int qsort on x86
+ * is about 700..800 bytes).
+ *
+ * Using this macro directly it isn't possible to implement traditional
+ * qsort() routine, because the macro assumes sizeof(element) == sizeof(TYPE),
+ * while qsort() allows element size to be different.
+ *
+ * Several ready-to-use examples:
+ *
+ * Sorting array of integers:
+ * void int_qsort(int *arr, unsigned n) {
+ * #define int_lt(a,b) ((*a)<(*b))
+ *   QSORT(int, arr, n, int_lt);
+ * }
+ *
+ * Sorting array of string pointers:
+ * void str_qsort(char *arr[], unsigned n) {
+ * #define str_lt(a,b) (strcmp((*a),(*b)) < 0)
+ *   QSORT(char*, arr, n, str_lt);
+ * }
+ *
+ * Sorting array of structures:
+ *
+ * struct elt {
+ *   int key;
+ *   ...
+ * };
+ * void elt_qsort(struct elt *arr, unsigned n) {
+ * #define elt_lt(a,b) ((a)->key < (b)->key)
+ *  QSORT(struct elt, arr, n, elt_lt);
+ * }
+ *
+ * And so on.
+ */
+
+/* Swap two items pointed to by A and B using temporary buffer t. */
+#define _QSORT_SWAP(a, b, t) ((void)((t = *a), (*a = *b), (*b = t)))
+
+/* Discontinue quicksort algorithm when partition gets below this size.
+   This particular magic number was chosen to work best on a Sun 4/260. */
+#define _QSORT_MAX_THRESH 4
+
+/* Stack node declarations used to store unfulfilled partition obligations
+ * (inlined in QSORT).
+typedef struct {
+  QSORT_TYPE *_lo, *_hi;
+} qsort_stack_node;
+ */
+
+/* The next 4 #defines implement a very fast in-line stack abstraction. */
+/* The stack needs log (total_elements) entries (we could even subtract
+   log(MAX_THRESH)).  Since total_elements has type unsigned, we get as
+   upper bound for log (total_elements):
+   bits per byte (CHAR_BIT) * sizeof(unsigned).  */
+#define _QSORT_STACK_SIZE	(8 * sizeof(unsigned))
+#define _QSORT_PUSH(top, low, high)	\
+	(((top->_lo = (low)), (top->_hi = (high)), ++top))
+#define	_QSORT_POP(low, high, top)	\
+	((--top, (low = top->_lo), (high = top->_hi)))
+#define	_QSORT_STACK_NOT_EMPTY	(_stack < _top)
+
+
+/* Order size using quicksort.  This implementation incorporates
+   four optimizations discussed in Sedgewick:
+
+   1. Non-recursive, using an explicit stack of pointer that store the
+      next array partition to sort.  To save time, this maximum amount
+      of space required to store an array of SIZE_MAX is allocated on the
+      stack.  Assuming a 32-bit (64 bit) integer for size_t, this needs
+      only 32 * sizeof(stack_node) == 256 bytes (for 64 bit: 1024 bytes).
+      Pretty cheap, actually.
+
+   2. Chose the pivot element using a median-of-three decision tree.
+      This reduces the probability of selecting a bad pivot value and
+      eliminates certain extraneous comparisons.
+
+   3. Only quicksorts TOTAL_ELEMS / MAX_THRESH partitions, leaving
+      insertion sort to order the MAX_THRESH items within each partition.
+      This is a big win, since insertion sort is faster for small, mostly
+      sorted array segments.
+
+   4. The larger of the two sub-partitions is always pushed onto the
+      stack first, with the algorithm then concentrating on the
+      smaller partition.  This *guarantees* no more than log (total_elems)
+      stack size is needed (actually O(1) in this case)!  */
+
+/* The main code starts here... */
+#define QSORT(QSORT_TYPE,QSORT_BASE,QSORT_NELT,QSORT_LT)		\
+{									\
+  QSORT_TYPE *const _base = (QSORT_BASE);				\
+  const unsigned _elems = (QSORT_NELT);					\
+  QSORT_TYPE _hold;							\
+									\
+  /* Don't declare two variables of type QSORT_TYPE in a single		\
+   * statement: eg `TYPE a, b;', in case if TYPE is a pointer,		\
+   * expands to `type* a, b;' wich isn't what we want.			\
+   */									\
+									\
+  if (_elems > _QSORT_MAX_THRESH) {					\
+    QSORT_TYPE *_lo = _base;						\
+    QSORT_TYPE *_hi = _lo + _elems - 1;					\
+    struct {								\
+      QSORT_TYPE *_hi; QSORT_TYPE *_lo;					\
+    } _stack[_QSORT_STACK_SIZE], *_top = _stack + 1;			\
+									\
+    while (_QSORT_STACK_NOT_EMPTY) {					\
+      QSORT_TYPE *_left_ptr; QSORT_TYPE *_right_ptr;			\
+									\
+      /* Select median value from among LO, MID, and HI. Rearrange	\
+         LO and HI so the three values are sorted. This lowers the	\
+         probability of picking a pathological pivot value and		\
+         skips a comparison for both the LEFT_PTR and RIGHT_PTR in	\
+         the while loops. */						\
+									\
+      QSORT_TYPE *_mid = _lo + ((_hi - _lo) >> 1);			\
+									\
+      if (QSORT_LT (_mid, _lo))						\
+        _QSORT_SWAP (_mid, _lo, _hold);					\
+      if (QSORT_LT (_hi, _mid))	{					\
+        _QSORT_SWAP (_mid, _hi, _hold);					\
+        if (QSORT_LT (_mid, _lo))					\
+          _QSORT_SWAP (_mid, _lo, _hold);				\
+      } 								\
+									\
+      _left_ptr  = _lo + 1;						\
+      _right_ptr = _hi - 1;						\
+									\
+      /* Here's the famous ``collapse the walls'' section of quicksort.	\
+         Gotta like those tight inner loops!  They are the main reason	\
+         that this algorithm runs much faster than others. */		\
+      do {								\
+        while (QSORT_LT (_left_ptr, _mid))				\
+         ++_left_ptr;							\
+									\
+        while (QSORT_LT (_mid, _right_ptr))				\
+          --_right_ptr;							\
+									\
+        if (_left_ptr < _right_ptr) {					\
+          _QSORT_SWAP (_left_ptr, _right_ptr, _hold);			\
+          if (_mid == _left_ptr)					\
+            _mid = _right_ptr;						\
+          else if (_mid == _right_ptr)					\
+            _mid = _left_ptr;						\
+          ++_left_ptr;							\
+          --_right_ptr;							\
+        }								\
+        else if (_left_ptr == _right_ptr) {				\
+          ++_left_ptr;							\
+          --_right_ptr;							\
+          break;							\
+        }								\
+      } while (_left_ptr <= _right_ptr);				\
+									\
+     /* Set up pointers for next iteration.  First determine whether	\
+        left and right partitions are below the threshold size.  If so,	\
+        ignore one or both.  Otherwise, push the larger partition's	\
+        bounds on the stack and continue sorting the smaller one. */	\
+									\
+      if (_right_ptr - _lo <= _QSORT_MAX_THRESH) {			\
+        if (_hi - _left_ptr <= _QSORT_MAX_THRESH)			\
+          /* Ignore both small partitions. */				\
+          _QSORT_POP (_lo, _hi, _top);					\
+        else								\
+          /* Ignore small left partition. */				\
+          _lo = _left_ptr;						\
+      }									\
+      else if (_hi - _left_ptr <= _QSORT_MAX_THRESH)			\
+        /* Ignore small right partition. */				\
+        _hi = _right_ptr;						\
+      else if (_right_ptr - _lo > _hi - _left_ptr) {			\
+        /* Push larger left partition indices. */			\
+        _QSORT_PUSH (_top, _lo, _right_ptr);				\
+        _lo = _left_ptr;						\
+      }									\
+      else {								\
+        /* Push larger right partition indices. */			\
+        _QSORT_PUSH (_top, _left_ptr, _hi);				\
+        _hi = _right_ptr;						\
+      }									\
+    }									\
+  }									\
+									\
+  /* Once the BASE array is partially sorted by quicksort the rest	\
+     is completely sorted using insertion sort, since this is efficient	\
+     for partitions below MAX_THRESH size. BASE points to the		\
+     beginning of the array to sort, and END_PTR points at the very	\
+     last element in the array (*not* one beyond it!). */		\
+									\
+  {									\
+    QSORT_TYPE *const _end_ptr = _base + _elems - 1;			\
+    QSORT_TYPE *_tmp_ptr = _base;					\
+    register QSORT_TYPE *_run_ptr;					\
+    QSORT_TYPE *_thresh;						\
+									\
+    _thresh = _base + _QSORT_MAX_THRESH;				\
+    if (_thresh > _end_ptr)						\
+      _thresh = _end_ptr;						\
+									\
+    /* Find smallest element in first threshold and place it at the	\
+       array's beginning.  This is the smallest array element,		\
+       and the operation speeds up insertion sort's inner loop. */	\
+									\
+    for (_run_ptr = _tmp_ptr + 1; _run_ptr <= _thresh; ++_run_ptr)	\
+      if (QSORT_LT (_run_ptr, _tmp_ptr))				\
+        _tmp_ptr = _run_ptr;						\
+									\
+    if (_tmp_ptr != _base)						\
+      _QSORT_SWAP (_tmp_ptr, _base, _hold);				\
+									\
+    /* Insertion sort, running from left-hand-side			\
+     * up to right-hand-side.  */					\
+									\
+    _run_ptr = _base + 1;						\
+    while (++_run_ptr <= _end_ptr) {					\
+      _tmp_ptr = _run_ptr - 1;						\
+      while (QSORT_LT (_run_ptr, _tmp_ptr))				\
+        --_tmp_ptr;							\
+									\
+      ++_tmp_ptr;							\
+      if (_tmp_ptr != _run_ptr) {					\
+        QSORT_TYPE *_trav = _run_ptr + 1;				\
+        while (--_trav >= _run_ptr) {					\
+          QSORT_TYPE *_hi; QSORT_TYPE *_lo;				\
+          _hold = *_trav;						\
+									\
+          for (_hi = _lo = _trav; --_lo >= _tmp_ptr; _hi = _lo)		\
+            *_hi = *_lo;						\
+          *_hi = _hold;							\
+        }								\
+      }									\
+    }									\
+  }									\
+									\
+}
diff --git a/srl_common.h b/srl_common.h
index d51d309..97eb0c3 100644
--- a/srl_common.h
+++ b/srl_common.h
@@ -59,9 +59,9 @@
  * http://en.wikipedia.org/wiki/FLAGS_register_(computing)
  */
 #ifdef SRL_X86_OR_X64_CPU
-#  if __x86_64__ || __x86_64
+#  if defined(__x86_64__) || defined(__x86_64)
 #    define SRL_TRY_ENABLE_STRICT_ALIGN() asm("pushf\norl $0x40000, (%rsp)\npopf")
-#  elif __i386__ || __i386
+#  elif defined(__i386__) || defined(__i386)
 #    define SRL_TRY_ENABLE_STRICT_ALIGN() asm("pushf\norl $0x40000, (%esp)\npopf")
 #  endif
 #else
diff --git a/srl_encoder.c b/srl_encoder.c
index fb1499a..f6ac87a 100644
--- a/srl_encoder.c
+++ b/srl_encoder.c
@@ -52,6 +52,7 @@ extern "C" {
 #include "ptable.h"
 #include "srl_buffer.h"
 #include "srl_compress.h"
+#include "qsort.h"
 
 /* The ENABLE_DANGEROUS_HACKS (passed through from ENV via Makefile.PL) enables
  * optimizations that may make the code so cozy with a particular version of the
@@ -91,7 +92,7 @@ SRL_STATIC_INLINE void srl_dump_hv(pTHX_ srl_encoder_t *enc, HV *src, U32 refcnt
 SRL_STATIC_INLINE void srl_dump_hk(pTHX_ srl_encoder_t *enc, HE *src, const int share_keys);
 SRL_STATIC_INLINE void srl_dump_nv(pTHX_ srl_encoder_t *enc, SV *src);
 SRL_STATIC_INLINE void srl_dump_ivuv(pTHX_ srl_encoder_t *enc, SV *src);
-SRL_STATIC_INLINE void srl_dump_classname(pTHX_ srl_encoder_t *enc, SV *referent, SV *replacement);
+SRL_STATIC_INLINE int srl_dump_classname(pTHX_ srl_encoder_t *enc, SV *referent, SV *replacement);
 SRL_STATIC_INLINE SV *srl_get_frozen_object(pTHX_ srl_encoder_t *enc, SV *src, SV *referent);
 SRL_STATIC_INLINE PTABLE_t *srl_init_string_hash(srl_encoder_t *enc);
 SRL_STATIC_INLINE PTABLE_t *srl_init_ref_hash(srl_encoder_t *enc);
@@ -123,6 +124,8 @@ SRL_STATIC_INLINE srl_encoder_t *srl_dump_data_structure(pTHX_ srl_encoder_t *en
                                     ? srl_init_weak_hash(enc)       \
                                    : (enc)->weak_seenhash )
 
+#define SRL_GET_WEAK_SEENHASH_OR_NULL(enc) ((enc)->weak_seenhash)
+
 #define SRL_GET_FREEZEOBJ_SVHASH(enc) ( (enc)->freezeobj_svhash == NULL     \
                                         ? srl_init_freezeobj_svhash(enc)    \
                                         : (enc)->freezeobj_svhash )
@@ -390,7 +393,7 @@ SRL_STATIC_INLINE srl_encoder_t *
 srl_empty_encoder_struct(pTHX)
 {
     srl_encoder_t *enc;
-    Newx(enc, 1, srl_encoder_t);
+    Newxz(enc, 1, srl_encoder_t);
     if (enc == NULL)
         croak("Out of memory");
 
@@ -400,25 +403,8 @@ srl_empty_encoder_struct(pTHX)
         croak("Out of memory");
     }
 
-    /* Set the tmp buffer struct's char buffer to NULL so we don't free
-     * something nasty if it's unused. */
-    enc->tmp_buf.start = NULL;
-
     enc->protocol_version = SRL_PROTOCOL_VERSION;
-    enc->recursion_depth = 0;
     enc->max_recursion_depth = DEFAULT_MAX_RECUR_DEPTH;
-    enc->operational_flags = 0;
-    /*enc->flags = 0;*/ /* to be set elsewhere */
-
-    enc->weak_seenhash = NULL;
-    enc->str_seenhash = NULL;
-    enc->ref_seenhash = NULL;
-    enc->snappy_workmem = NULL;
-    enc->string_deduper_hv = NULL;
-
-    enc->freezeobj_svhash = NULL;
-    enc->sereal_string_sv = NULL;
-    enc->scratch_sv = NULL;
 
     return enc;
 }
@@ -548,8 +534,15 @@ srl_build_encoder_struct(pTHX_ HV *opt, sv_with_hash *options)
         my_hv_fetchs(he, val, opt, SRL_ENC_OPT_IDX_SORT_KEYS);
         if ( !val )
             my_hv_fetchs(he, val, opt, SRL_ENC_OPT_IDX_CANONICAL);
-        if ( val && SvTRUE(val) )
+        if ( val && SvTRUE(val) ) {
             SRL_ENC_SET_OPTION(enc, SRL_F_SORT_KEYS);
+            if (SvIV(val) > 1) {
+                SRL_ENC_SET_OPTION(enc, SRL_F_SORT_KEYS_PERL);
+                if (SvIV(val) > 2) {
+                    SRL_ENC_SET_OPTION(enc, SRL_F_SORT_KEYS_PERL_REV);
+                }
+            }
+        }
 
         my_hv_fetchs(he, val, opt, SRL_ENC_OPT_IDX_CANONICAL_REFS);
         if ( !val )
@@ -878,8 +871,11 @@ srl_get_frozen_object(pTHX_ srl_encoder_t *enc, SV *src, SV *referent)
 }
 
 /* Outputs a bless header and the class name (as some form of string or COPY).
- * Caller then has to output the actual reference payload. */
-SRL_STATIC_INLINE void
+ * Caller then has to output the actual reference payload.
+ * If it returns 1 it means the classname was written out and should NOT
+ * be overwritten by the ref rewrite logic (which handles REFP).
+ * If it returns 0 it means no classname was output. */
+SRL_STATIC_INLINE int
 srl_dump_classname(pTHX_ srl_encoder_t *enc, SV *referent, SV *replacement)
 {
     /* Check that we actually want to support objects */
@@ -888,11 +884,25 @@ srl_dump_classname(pTHX_ srl_encoder_t *enc, SV *referent, SV *replacement)
                 "using Sereal::Encoder was explicitly disabled using the "
                 "'croak_on_bless' option.");
     } else if (expect_false( SRL_ENC_HAVE_OPTION(enc, SRL_F_NO_BLESS_OBJECTS) )) {
-        return;
+        return 0;
     } else {
         const HV *stash = SvSTASH(referent);
         PTABLE_t *string_seenhash = SRL_GET_STR_PTR_SEENHASH(enc);
-        const ptrdiff_t oldoffset = (ptrdiff_t)PTABLE_fetch(string_seenhash, (SV *)stash);
+        svtype svt= SvTYPE(referent);
+        int is_av_or_hv= (svt == SVt_PVAV || svt== SVt_PVHV);
+        ptrdiff_t oldoffset= is_av_or_hv
+                           ? 0
+                           : (ptrdiff_t)PTABLE_fetch(string_seenhash, referent);
+
+        if (oldoffset) {
+            return 0;
+        } else {
+            svt= replacement ? SvTYPE(replacement) : SvTYPE(referent);
+            if (SRL_UNSUPPORTED_SvTYPE(svt)) {
+                return 0;
+            }
+            oldoffset= (ptrdiff_t)PTABLE_fetch(string_seenhash, (SV *)stash);
+        }
 
         if (oldoffset != 0) {
             /* Issue COPY instead of literal class name string */
@@ -923,7 +933,15 @@ srl_dump_classname(pTHX_ srl_encoder_t *enc, SV *referent, SV *replacement)
             srl_dump_pv(aTHX_ enc, class_name, len, 0);
 #endif
         }
+        if (is_av_or_hv) {
+            return 0;
+        } else {
+            /* use the string_seenhash to track which items we have seen before */
+            PTABLE_store(string_seenhash, (void *)referent, INT2PTR(void *, BODY_POS_OFS(&enc->buf)));
+            return 1;
+        }
     }
+    return 0;
 }
 
 
@@ -1027,25 +1045,30 @@ srl_dump_data_structure_mortal_sv(pTHX_ srl_encoder_t *enc, SV *src, SV *user_he
 SRL_STATIC_INLINE void
 srl_fixup_weakrefs(pTHX_ srl_encoder_t *enc)
 {
-    PTABLE_t *weak_seenhash = SRL_GET_WEAK_SEENHASH(enc);
-    PTABLE_ITER_t *it = PTABLE_iter_new(weak_seenhash);
-    PTABLE_ENTRY_t *ent;
-
-    /* we now walk the weak_seenhash and set any tags it points
-     * at to the PAD opcode, this basically turns the first weakref
-     * we encountered into a normal ref when there is only a weakref
-     * pointing at the structure. */
-    while ( NULL != (ent = PTABLE_iter_next(it)) ) {
-        const ptrdiff_t offset = (ptrdiff_t)ent->value;
-        if ( offset ) {
-            srl_buffer_char *pos = enc->buf.body_pos + offset;
-            assert(*pos == SRL_HDR_WEAKEN);
-            if (DEBUGHACK) warn("setting byte at offset %"UVuf" to PAD", (UV)offset);
-            *pos = SRL_HDR_PAD;
+    PTABLE_t *weak_seenhash = SRL_GET_WEAK_SEENHASH_OR_NULL(enc);
+    if (!weak_seenhash)
+        return;
+
+    {
+        PTABLE_ITER_t *it = PTABLE_iter_new(weak_seenhash);
+        PTABLE_ENTRY_t *ent;
+
+        /* we now walk the weak_seenhash and set any tags it points
+         * at to the PAD opcode, this basically turns the first weakref
+         * we encountered into a normal ref when there is only a weakref
+         * pointing at the structure. */
+        while ( NULL != (ent = PTABLE_iter_next(it)) ) {
+            const ptrdiff_t offset = (ptrdiff_t)ent->value;
+            if ( offset ) {
+                srl_buffer_char *pos = enc->buf.body_pos + offset;
+                assert(*pos == SRL_HDR_WEAKEN);
+                if (DEBUGHACK) warn("setting byte at offset %"UVuf" to PAD", (UV)offset);
+                *pos = SRL_HDR_PAD;
+            }
         }
-    }
 
-    PTABLE_iter_free(it);
+        PTABLE_iter_free(it);
+    }
 }
 
 
@@ -1141,145 +1164,290 @@ srl_dump_av(pTHX_ srl_encoder_t *enc, AV *src, U32 refcount)
     }
 }
 
-/* compare hash entries, used when all keys are bytestrings */
-static int
-he_cmp_fast(const void *a_, const void *b_)
+SRL_STATIC_INLINE void
+srl_dump_hv_unsorted_nomg(pTHX_ srl_encoder_t *enc, HV *src, UV n)
 {
-    /* even though we are called as a callback from qsort there is
-     * no need for a dTHX here, we don't use anything that needs it */
-    int cmp;
+    HE *he;
+    const int do_share_keys = HvSHAREKEYS((SV *)src);
+    HE **he_ptr= HvARRAY(src);
+    HE **he_end= he_ptr + HvMAX(src) + 1;
 
-    HE *a = *(HE **)a_;
-    HE *b = *(HE **)b_;
+    do {
+        for (he= *he_ptr++; he; he= HeNEXT(he) ) {
+            SV *v= HeVAL(he);
+            if (v != &PL_sv_placeholder) {
+                srl_dump_hk(aTHX_ enc, he, do_share_keys);
+                CALL_SRL_DUMP_SV(enc, v);
+                if (--n == 0) {
+                    he_ptr= he_end;
+                    break;
+                }
+            }
+        }
+    } while ( he_ptr < he_end );
+}
+
+SRL_STATIC_INLINE void
+srl_dump_hv_unsorted_mg(pTHX_ srl_encoder_t *enc, HV *src, const UV n)
+{
+    HE *he;
+    UV i= 0;
+    const int do_share_keys = HvSHAREKEYS((SV *)src);
+
+    (void)hv_iterinit(src); /* return value not reliable according to API docs */
+    while ((he = hv_iternext(src))) {
+        SV *v;
+        if (expect_false( i == n ))
+            croak("Panic: cannot serialize a tied hash which changes its size!");
+        v= hv_iterval(src, he);
+        srl_dump_hk(aTHX_ enc, he, do_share_keys);
+        CALL_SRL_DUMP_SV(enc, v);
+        ++i;
+    }
+    if (expect_false( i != n ))
+        croak("Panic: cannot serialize a tied hash which changes its size!");
+}
+
+/* sorting hashes - nothing in perl is easy. ever.
+ *
+ * Some things to keep in mind about perl hashes as you read this code:
+ *
+ * Hashes may be shared or not. Usually shared. This means they share their
+ * key data via PL_strtab.
+ *
+ * Hashes may be tied or not. Usually not. When tied the keys from the hash
+ * are available only as SV *'s, and when untied, the keys from the hash are
+ * accessed via HE *'s.
+ *
+ * Some HE's actually contains SV's but most contain a ptr/len combo with
+ * an utf8 flag. To make things even more interesting utf8 keys are
+ * normalized to latin1 by perl where possible before being stored in the HE,
+ * with the utf8 flag indicating "was utf8" instead of "is utf8" or "not utf8".
+ *
+ * The complexity about accessing the key for a hash can be managed away by
+ * perl via API's like hv_iterkeysv(), but using that means constructing mortal
+ * SV's for each key as we go.
+ *
+ * We could in theory use the HePV() interface, but one annoying result of the
+ * "was utf8" logic is that we can't use a sort comparator which looks
+ * at the raw binary of the keys when the keys might contain utf8. A utf8 key
+ * like "\xDF" will be downgraded to ascii in the HE form, but will be upgraded
+ * to the utf8 representation in the SV form. So if we want to do "fast" sorting
+ * we have to restrict it to non-utf8/non-sv keys, and force the use of the SV
+ * based API (which we have to use for tie's anyway) when we see a UTF8 key.
+ *
+ * Which is what we do below. In order to sort a hash we need to construct an
+ * array of its contents, in srl_dump_sorted_nomg() we walk the hash, checking
+ * each key, and copying each HE over into a scratch buffer which it then sorts.
+ * If during the transcription process it sees any utf8 or SV keys it exits
+ * immediately, and falls through to srl_dump_sort_mg(), which uses hv_iterkeysv()
+ * to construct an array of HE_SV instead, which we then sort.
+ */
 
-    STRLEN la = HeKLEN (a);
-    STRLEN lb = HeKLEN (b);
 
-    if (!(cmp = memcmp (HeKEY (b), HeKEY (a), lb < la ? lb : la)))
-        cmp = lb - la;
 
-    return cmp;
+SRL_STATIC_INLINE int
+he_islt(const HE *a, const HE *b)
+{
+    /* no need for a dTHX here, we don't use anything that needs it */
+    const STRLEN la = HeKLEN(a);
+    const STRLEN lb = HeKLEN(b);
+    const int cmp = memcmp(HeKEY(a), HeKEY(b), la < lb ? la : lb);
+    if (cmp) {
+        return cmp < 0;
+    } else {
+        return la < lb;
+    }
 }
 
-/* compare hash entries, used when some keys are sv's or utf8 */
-static int
-he_cmp_slow(const void *a, const void *b)
+SRL_STATIC_INLINE int
+he_sv_islt_fast(const HE_SV *a, const HE_SV *b)
 {
-    /* we are called as a callback from qsort, so no pTHX
-     * is possible in our argument signature, so we need to do a
-     * dTHX; here ourselves. */
-    dTHX;
-    return sv_cmp( HeSVKEY_force( *(HE **)b), HeSVKEY_force( *(HE **)a ) );
+    /* no need for a dTHX here, we don't use anything that needs it */
+    char *a_ptr;
+    char *b_ptr;
+    int a_isutf8;
+    int b_isutf8;
+    const STRLEN a_len= a->key.sv ? SvCUR(a->key.sv) : HeKLEN(a->val.he);
+    const STRLEN b_len= b->key.sv ? SvCUR(b->key.sv) : HeKLEN(b->val.he);
+    if (a_len != b_len) {
+        return a_len < b_len;
+    }
+    a_isutf8= (a->key.sv ? SvUTF8(a->key.sv) : HeKUTF8(a->val.he)) ? 0 : 1;
+    b_isutf8= (b->key.sv ? SvUTF8(b->key.sv) : HeKUTF8(b->val.he)) ? 0 : 1;
+    if (a_isutf8 != b_isutf8) {
+        return a_isutf8 < b_isutf8;
+    }
+    a_ptr= a->key.sv ? SvPVX(a->key.sv) : HeKEY(a->val.he);
+    b_ptr= b->key.sv ? SvPVX(b->key.sv) : HeKEY(b->val.he);
+    return memcmp(a_ptr, b_ptr, a_len < b_len ? a_len : b_len ) < 0;
 }
 
+#define ISLT_HE_SV(a,b)    he_sv_islt_fast( a, b )
+#define ISLT_SV_CMP(a,b)   sv_cmp(a->key.sv, b->key.sv) == sort_dir
+
 
 SRL_STATIC_INLINE void
-srl_dump_hv(pTHX_ srl_encoder_t *enc, HV *src, U32 refcount)
+srl_qsort(pTHX_ srl_encoder_t *enc, const UV n, HE_SV *array)
+{
+    if ( SRL_ENC_HAVE_OPTION(enc, SRL_F_SORT_KEYS_PERL) ) {
+        int sort_dir= SRL_ENC_HAVE_OPTION(enc, SRL_F_SORT_KEYS_PERL_REV) ? 1 : -1;
+        /* hack to forcefully disable "use bytes" */
+        COP cop= *PL_curcop;
+        cop.op_private= 0;
+
+        ENTER;
+        SAVETMPS;
+
+        SAVEVPTR (PL_curcop);
+        PL_curcop= &cop;
+       
+        /* now sort */
+        QSORT(HE_SV, array, n, ISLT_SV_CMP);
+
+        FREETMPS;
+        LEAVE;
+    } else {
+        /* now sort */
+        QSORT(HE_SV, array, n, ISLT_HE_SV);
+    }
+}
+
+
+SRL_STATIC_INLINE void
+srl_dump_hv_sorted_sv_slow(pTHX_ srl_encoder_t *enc, HV *src, const UV n, HE_SV *array)
 {
     HE *he;
+    UV i= 0;
     const int do_share_keys = HvSHAREKEYS((SV *)src);
-    UV n;
+    const int is_tie= !array;
+
+    /* This sub is used for ties, and for hashes with SV keys in them,
+     * and when the user requests SORT_KEYS_PERL, it is the slowest way
+     * and most memory hungry way to serialize a hash. We will use the 
+     * full perl api for extracting the contents of the hash, which fortifies
+     * us against ties, and we will convert all keys into mortal
+     * sv's where necessary. This means we can use sv_cmp on the keys
+     * if we wish.
+     */
 
-    if ( SvMAGICAL(src) || SRL_ENC_HAVE_OPTION(enc, SRL_F_SORT_KEYS) ) {
-        UV i;
-        /* for tied hashes, we have to iterate to find the number of entries. Alas... */
-        (void)hv_iterinit(src); /* return value not reliable according to API docs */
-        n = 0;
-        while ((he = hv_iternext(src))) { ++n; }
+    (void)hv_iterinit(src); /* return value not reliable according to API docs */
+    {
+        HE_SV *array_end;
+        if (!array) {
+            Newx(array, n, HE_SV);
+            SAVEFREEPV(array);
+        }
+        array_end= array + n;
+        while ((he = hv_iternext(src))) {
+            if (expect_false( i == n ))
+                croak("Panic: cannot serialize a %s hash which changes its size!",is_tie ? "tied" : "untied");
+            array[i].key.sv= hv_iterkeysv(he);
+            array[i].val.sv= hv_iterval(src,he);
+            i++;
+        }
+        if (expect_false( i != n ))
+            croak("Panic: can not serialize a %s hash which changes it size!", is_tie ? "tied" : "untied");
 
-        BUF_SIZE_ASSERT_HV(&enc->buf, n);
+        srl_qsort(aTHX_ enc, n, array);
 
-        if (n < 16 && refcount == 1 && !SRL_ENC_HAVE_OPTION(enc,SRL_F_CANONICAL_REFS)) {
-            enc->buf.pos--; /* back up over the previous REFN */
-            srl_buf_cat_char_nocheck(&enc->buf, SRL_HDR_HASHREF + n);
-        } else {
-            srl_buf_cat_varint_nocheck(aTHX_ &enc->buf, SRL_HDR_HASH, n);
+        while ( array < array_end ) {
+            CALL_SRL_DUMP_SV(enc, array->key.sv);
+            CALL_SRL_DUMP_SV(enc, array->val.sv);
+            array++;
         }
+    }
+}
 
-        (void)hv_iterinit(src); /* return value not reliable according to API docs */
-        i = 0;
-        if (SRL_ENC_HAVE_OPTION(enc, SRL_F_SORT_KEYS)) {
-            HE **he_array;
-            int fast = 1;
-            Newxz(he_array, n, HE*);
-            SAVEFREEPV(he_array);
-            while ((he = hv_iternext(src))) {
-                if (expect_false( i == n ))
-                    croak("Panic: Trying to dump a tied hash that has a different number of keys in each iteration is just daft. Do not do that.");
-                he_array[i++]= he;
-                if (HeKLEN (he) < 0 || HeKUTF8 (he))
-                    fast = 0;
-            }
-            if (expect_false( i != n ))
-                croak("Panic: Trying to dump a tied hash that has a different number of keys in each iteration is just daft. Do not do that.");
-            if (fast) {
-                qsort(he_array, n, sizeof (HE *), he_cmp_fast);
-            } else {
-                /* hack to forcefully disable "use bytes" */
-                COP cop= *PL_curcop;
-                cop.op_private= 0;
-
-                ENTER;
-                SAVETMPS;
 
-                SAVEVPTR (PL_curcop);
-                PL_curcop= &cop;
+SRL_STATIC_INLINE void
+srl_dump_hv_sorted_nomg(pTHX_ srl_encoder_t *enc, HV *src, const UV n)
+{
+    HE *he;
+    const int do_share_keys = HvSHAREKEYS((SV *)src);
 
-                qsort(he_array, n, sizeof (HE *), he_cmp_slow);
+    /* This sub is used only for untied hashes and when the user wants
+     * sorted keys, but not necessarily the order that perl would use. 
+     */
 
-                FREETMPS;
-                LEAVE;
-            }
-            for ( i= 0; i < n ; i++ ) {
-                SV *v;
-                he= he_array[i];
-                v= hv_iterval(src, he);
-                srl_dump_hk(aTHX_ enc, he, do_share_keys);
-                CALL_SRL_DUMP_SV(enc, v);
-            }
-        } else {
-            while ((he = hv_iternext(src))) {
-                SV *v;
-                if (expect_false( i == n ))
-                    croak("Panic: Trying to dump a tied hash that has a different number of keys in each iteration is just daft. Do not do that.");
-                v= hv_iterval(src, he);
-                srl_dump_hk(aTHX_ enc, he, do_share_keys);
-                CALL_SRL_DUMP_SV(enc, v);
-                ++i;
+    (void)hv_iterinit(src); /* return value not reliable according to API docs */
+    {
+        HE_SV *array;
+        HE_SV *array_ptr;
+        HE_SV *array_end;
+        Newx(array, n, HE_SV);
+        SAVEFREEPV(array);
+        array_ptr = array;
+        while ((he = hv_iternext(src))) {
+            if ( HeKWASUTF8(he) ) {
+                array_ptr->key.sv= hv_iterkeysv(he);
+            } else {
+                array_ptr->key.sv = HeSVKEY(he);
             }
-            if (expect_false( i != n ))
-                croak("Panic: Trying to dump a tied hash that has a different number of keys in each iteration is just daft. Do not do that.");
+            array_ptr->val.he = he;
+            array_ptr++;
         }
-    } else {
+        
+        srl_qsort(aTHX_ enc, n, array);
+
+        array_end = array + n;
+        for ( array_end= array + n; array < array_end; array++ ) {
+            SV *v;
+            he = array->val.he;
+            v = hv_iterval(src, he);
+            srl_dump_hk(aTHX_ enc, he, do_share_keys);
+            CALL_SRL_DUMP_SV(enc, v);
+        }
+    }
+}
+
+SRL_STATIC_INLINE void
+srl_dump_hv(pTHX_ srl_encoder_t *enc, HV *src, U32 refcount)
+{
+    HE *he;
+    UV n;
+    if ( SvMAGICAL(src) ) {
+        /* for tied hashes, we have to iterate to find the number of entries. Alas... */
+        n= 0;
+        (void)hv_iterinit(src); /* return value not reliable according to API docs */
+        while ((he = hv_iternext(src))) { ++n; }
+    }
+    else {
         n= HvUSEDKEYS(src);
-        BUF_SIZE_ASSERT_HV(&enc->buf, n);
-        if (n < 16 && refcount == 1 && !SRL_ENC_HAVE_OPTION(enc,SRL_F_CANONICAL_REFS)) {
-            enc->buf.pos--; /* backup over the previous REFN */
-            srl_buf_cat_char_nocheck(&enc->buf, SRL_HDR_HASHREF + n);
-        } else {
-            srl_buf_cat_varint_nocheck(aTHX_ &enc->buf, SRL_HDR_HASH, n);
+    }
+
+    BUF_SIZE_ASSERT_HV(&enc->buf, n);
+    if (n < 16 && refcount == 1 && !SRL_ENC_HAVE_OPTION(enc,SRL_F_CANONICAL_REFS)) {
+        enc->buf.pos--; /* backup over the previous REFN */
+        srl_buf_cat_char_nocheck(&enc->buf, SRL_HDR_HASHREF + n);
+    } else {
+        srl_buf_cat_varint_nocheck(aTHX_ &enc->buf, SRL_HDR_HASH, n);
+    }
+
+    if ( n ) {
+        if ( SvMAGICAL(src) || SRL_ENC_HAVE_OPTION(enc, SRL_F_SORT_KEYS_PERL) ) {
+            /* SORT_KEYS_PERL implies SORT_KEYS, but we check for either just to be
+             * careful - yves*/
+            if ( SRL_ENC_HAVE_OPTION(enc, SRL_F_SORT_KEYS|SRL_F_SORT_KEYS_PERL) ) {
+                srl_dump_hv_sorted_sv_slow(aTHX_ enc, src, n, NULL);
+            }
+            else {
+                srl_dump_hv_unsorted_mg(aTHX_ enc, src, n);
+            }
         }
-        if (n) {
-            HE **he_ptr= HvARRAY(src);
-            HE **he_end= he_ptr + HvMAX(src) + 1;
-            do {
-                for (he= *he_ptr++; he; he= HeNEXT(he) ) {
-                    SV *v= HeVAL(he);
-                    if (v != &PL_sv_placeholder) {
-                        srl_dump_hk(aTHX_ enc, he, do_share_keys);
-                        CALL_SRL_DUMP_SV(enc, v);
-                        if (--n == 0) {
-                            he_ptr= he_end;
-                            break;
-                        }
-                    }
-                }
-            } while ( he_ptr < he_end );
+        else {
+            if ( SRL_ENC_HAVE_OPTION(enc, SRL_F_SORT_KEYS) ) {
+                srl_dump_hv_sorted_nomg(aTHX_ enc, src, n);
+            }
+            else {
+                srl_dump_hv_unsorted_nomg(aTHX_ enc, src, n);
+            }
         }
     }
 }
 
 
+
 SRL_STATIC_INLINE void
 srl_dump_hk(pTHX_ srl_encoder_t *enc, HE *src, const int share_keys)
 {
@@ -1527,7 +1695,7 @@ redo_dump:
 #if defined(MODERN_REGEXP) && defined(REGEXP_NO_LONGER_POK)
     /* Only need to enter here if we have rather modern regexps AND they're
      * NO LONGER POK (5.17.6 and up). */
-    if (expect_false( svt == SVt_REGEXP ) ) {
+    if ( expect_false( svt == SVt_REGEXP ) ) {
         srl_dump_regexp(aTHX_ enc, src);
     }
     else
@@ -1550,10 +1718,13 @@ redo_dump:
 
         ref_rewrite_pos= BODY_POS_OFS(&enc->buf);
 
-        if (expect_false( sv_isobject(src) )) {
+        if ( expect_false( sv_isobject(src) ) ) {
             /* Write bless operator with class name */
             replacement= srl_get_frozen_object(aTHX_ enc, src, referent);
-            srl_dump_classname(aTHX_ enc, referent, replacement); /* 1 == have freeze call */
+            if (srl_dump_classname(aTHX_ enc, referent, replacement)) {
+                /* 1 means we should not rewrite away the classname */
+                ref_rewrite_pos= BODY_POS_OFS(&enc->buf);
+            }
         }
 
         srl_buf_cat_char(&enc->buf, SRL_HDR_REFN);
@@ -1583,11 +1754,12 @@ redo_dump:
         srl_dump_av(aTHX_ enc, (AV *)src, refcount);
     }
     else
-    if (!SvOK(src)) { /* undef and weird shit */
-        if ( svt > SVt_PVMG ) {  /* we exclude magic, because magic sv's can be undef too */
+    if ( ! SvOK(src) ) { /* undef and weird shit */
+        if ( SRL_UNSUPPORTED_SvTYPE(svt) ) {
+            /* we exclude magic, because magic sv's can be undef too */
             /* called when we find an unsupported type/reference. May either throw exception
              * or write ONE (nested or single) item to the buffer. */
-#define SRL_HANDLE_UNSUPPORTED_TYPE(enc, src, svt, refsv, ref_rewrite_pos)                     \
+#define SRL_HANDLE_UNSUPPORTED_SvTYPE(enc, src, svt, refsv, ref_rewrite_pos)                     \
             STMT_START {                                                                       \
                 if ( SRL_ENC_HAVE_OPTION((enc), SRL_F_UNDEF_UNKNOWN) ) {                       \
                     if (SRL_ENC_HAVE_OPTION((enc), SRL_F_WARN_UNKNOWN))                        \
@@ -1638,7 +1810,7 @@ redo_dump:
                           "by the Sereal encoding format", (svt), sv_reftype((src),0),(src));  \
                 }                                                                              \
             } STMT_END
-            SRL_HANDLE_UNSUPPORTED_TYPE(enc, src, svt, refsv, ref_rewrite_pos);
+            SRL_HANDLE_UNSUPPORTED_SvTYPE(enc, src, svt, refsv, ref_rewrite_pos);
         }
         else if (src == &PL_sv_undef && enc->protocol_version >= 3 ) {
             srl_buf_cat_char(&enc->buf, SRL_HDR_CANONICAL_UNDEF);
@@ -1647,8 +1819,8 @@ redo_dump:
         }
     }
     else {
-        SRL_HANDLE_UNSUPPORTED_TYPE(enc, src, svt, refsv, ref_rewrite_pos);
-#undef SRL_HANDLE_UNSUPPORTED_TYPE
+        SRL_HANDLE_UNSUPPORTED_SvTYPE(enc, src, svt, refsv, ref_rewrite_pos);
+#undef SRL_HANDLE_UNSUPPORTED_SvTYPE
     }
     --enc->recursion_depth;
 }
diff --git a/srl_encoder.h b/srl_encoder.h
index f629410..639f220 100644
--- a/srl_encoder.h
+++ b/srl_encoder.h
@@ -27,7 +27,11 @@ typedef struct {
     UV recursion_depth;       /* current Perl-ref recursion depth */
     ptable_ptr ref_seenhash;  /* ptr table for avoiding circular refs */
     ptable_ptr weak_seenhash; /* ptr table for avoiding dangling weakrefs */
-    ptable_ptr str_seenhash;  /* ptr table for issuing COPY commands based on PTRS (used for classnames and keys) */
+    ptable_ptr str_seenhash;  /* ptr table for issuing COPY commands based on PTRS (used for classnames and keys)
+                               * for now this is also coopted to track which objects we have dumped as objects,
+                               * and to ensure we only output a given object once.
+                               * Possibly this should be replaced with freezeobj_svhash, but this works fine.
+                               */
     ptable_ptr freezeobj_svhash; /* ptr table for tracking objects and their frozen replacments via FREEZE */
     HV *string_deduper_hv;    /* track strings we have seen before, by content */
 
@@ -45,6 +49,16 @@ typedef struct {
     U32 hash;
 } sv_with_hash;
 
+typedef struct {
+    union {
+        SV *sv;
+    } key;
+    union {
+        HE *he;
+        SV *sv;
+    } val;
+} HE_SV;
+
 /* constructor from options */
 srl_encoder_t *srl_build_encoder_struct(pTHX_ HV *opt, sv_with_hash *options);
 
@@ -84,10 +98,12 @@ SV *srl_dump_data_structure_mortal_sv(pTHX_ srl_encoder_t *enc, SV *src, SV *use
  * set since we otherwise croak.  Corresponds to the 'warn_unknown' option. */
 #define SRL_F_WARN_UNKNOWN                      0x00020UL
 
-/* WARNING: SRL_F_COMPRESS_SNAPPY               0x00040UL
- *          SRL_F_COMPRESS_SNAPPY_INCREMENTAL   0x00080UL
- *          SRL_F_COMPRESS_ZLIB                 0x00100UL
- *          are moved to srl_compress.h */
+/* WARNING:
+ * #define SRL_F_COMPRESS_SNAPPY               0x00040UL
+ * #define SRL_F_COMPRESS_SNAPPY_INCREMENTAL   0x00080UL
+ * #define SRL_F_COMPRESS_ZLIB                 0x00100UL
+ * are moved to srl_compress.h
+ */
 
 /* Only meaningful if SRL_F_WARN_UNKNOWN also set. If this one is set, then we don't warn
  * if the unsupported item has string overloading. */
@@ -116,13 +132,15 @@ SV *srl_dump_data_structure_mortal_sv(pTHX_ srl_encoder_t *enc, SV *src, SV *use
 /* if set in flags, then do not use ARRAYREF or HASHREF ever */
 #define SRL_F_CANONICAL_REFS                    0x08000UL
 
+#define SRL_F_SORT_KEYS_PERL                    0x10000UL
+#define SRL_F_SORT_KEYS_PERL_REV                0x20000UL
 /* ====================================================================
  * oper flags
  */
 /* Set while the encoder is in active use / dirty */
 #define SRL_OF_ENCODER_DIRTY                 1UL
 
-#define SRL_ENC_HAVE_OPTION(enc, flag_num) ((enc)->flags & flag_num)
+#define SRL_ENC_HAVE_OPTION(enc, flag_num) ((enc)->flags & (flag_num))
 #define SRL_ENC_SET_OPTION(enc, flag_num) STMT_START {(enc)->flags |= (flag_num);}STMT_END
 #define SRL_ENC_RESET_OPTION(enc, flag_num) STMT_START {(enc)->flags &= ~(flag_num);}STMT_END
 
@@ -133,6 +151,14 @@ SV *srl_dump_data_structure_mortal_sv(pTHX_ srl_encoder_t *enc, SV *src, SV *use
 #define SRL_ENC_SV_COPY_ALWAYS 0x00000000UL
 #define SRL_ENC_SV_REUSE_MAYBE 0x00000001UL
 
+#define SRL_UNSUPPORTED_SvTYPE(svt) (   \
+    /* svt == SVt_INVLIST || */         \
+    svt == SVt_PVGV ||                  \
+    svt == SVt_PVCV ||                  \
+    svt == SVt_PVFM ||                  \
+    svt == SVt_PVIO ||                  \
+    0 )
+
 /* by default we do not allow people to build with support for SRL_HDR_LONG_DOUBLE */
 #if defined(SRL_ALLOW_LONG_DOUBLE) && defined(USE_LONG_DOUBLE) && defined(HAS_LONG_DOUBLE)
 #define SRL_DO_LONG_DOUBLE      1
diff --git a/srl_reader_error.h b/srl_reader_error.h
index f83f5c3..f478fe7 100644
--- a/srl_reader_error.h
+++ b/srl_reader_error.h
@@ -22,11 +22,13 @@
 #define SRL_RDR_ERROR_PANIC(buf, msg)                        SRL_RDR_ERRORf1((buf), "Panic: %s", msg);
 
 /* trace functions */
+#ifdef WANT_SRL_RDR_TRACE
 #ifdef TRACE_READER
 #   define SRL_RDR_TRACE(msg, args...) \
         fprintf(stderr, "%s:%d:%s(): "msg"\n", __FILE__, __LINE__, __func__, ## args)
 #else
 #   define SRL_RDR_TRACE(msg, args...)
 #endif
+#endif
 
 #endif
diff --git a/t/005_flags.t b/t/005_flags.t
index 7c78740..f89676e 100644
--- a/t/005_flags.t
+++ b/t/005_flags.t
@@ -1,7 +1,6 @@
 use strict;
 use warnings;
 
-use Sereal::Decoder;
 use Test::More;
 use File::Spec;
 use lib File::Spec->catdir(qw(t lib));
@@ -18,7 +17,7 @@ if (have_encoder_and_decoder(3.005003)) {
     run_tests( "aliased_dedupe_strings", { aliased_dedupe_strings => 1 } );
     done_testing();
 } else {
-    plan skip_all => 'Did not find right version of encoder';
+    plan skip_all => "Did not find right version of encoder/decoder";
 }
 
 sub run_tests {
@@ -69,6 +68,7 @@ sub run_tests {
 
         TODO: {
             # we must do this test before we test numeric equivalence
+            no warnings 'numeric';
             my $have= ($decoded ^ '1');
             my $want= ($tests{$test} ^ '1');
             local $TODO = $have ne $want ? "Cannot reliably round trip NIOK flag(s)" : undef;
diff --git a/t/030_canonical_vs_test_deep.t b/t/030_canonical_vs_test_deep.t
index adc3bb8..1b3705e 100644
--- a/t/030_canonical_vs_test_deep.t
+++ b/t/030_canonical_vs_test_deep.t
@@ -33,7 +33,7 @@ eval {
         unless version->new(Test::Deep->VERSION) >= version->new('0.110');
     1;
 } or do {
-    my $error = $@ // "Zombie Error";
+    my $error = $@ || "Zombie Error";
     plan skip_all => "We are skipping all our tests because we don't have a suitable Test::Deep here, got error: $error";
 };
 plan tests => keys(%tests) * 2;
diff --git a/t/040_tied_hash.t b/t/040_tied_hash.t
new file mode 100644
index 0000000..6549ed8
--- /dev/null
+++ b/t/040_tied_hash.t
@@ -0,0 +1,71 @@
+use warnings;
+use strict;
+
+package NewStdHash;
+require Tie::Hash;
+our @ISA = qw(Tie::StdHash);
+
+package main;
+use Test::More;
+use File::Spec;
+use lib File::Spec->catdir(qw(t lib));
+BEGIN {
+    lib->import('lib')
+        if !-d 't';
+}
+use Sereal::TestSet qw(:all);
+my $u_df= "\xDFu";
+utf8::upgrade($u_df);
+my @keys= (
+    'foo', 'bar', 'mip', 'xap', 'food', 'fool', 'fools', 'barking', 'bark',
+    $u_df,
+    "\x{df}a",
+    "\x{c3}",
+    "\x{de}",
+    "\x{e0}",
+    "\x{100}",
+    "\x{123}",
+    "\x{c4}\x{80}",
+);
+my $have_decoder= have_encoder_and_decoder();
+if ($have_decoder) {
+    plan tests => 1 + (4 * @keys);
+} else {
+    plan tests => 1;
+}
+
+my $enc = Sereal::Encoder->new({
+    sort_keys => 1,
+});
+
+tie my %new_std_hash, 'NewStdHash';
+my %normal_hash;
+foreach my $i (0..$#keys) {
+    $new_std_hash{$keys[$i]} = $i;
+    $normal_hash{$keys[$i]}= $i;
+}
+
+my $enc_tied  = $enc->encode(\%new_std_hash);
+my $enc_normal= $enc->encode(\%normal_hash);
+
+
+is($enc_tied, $enc_normal, "Tied and untied are the same")
+or do {
+    diag "Normal:\n";
+    hobodecode $enc_normal;
+    diag "Tied: \n";
+    hobodecode $enc_tied;
+};
+
+if ($have_decoder) {
+    my $dec= Sereal::Decoder->new();
+    my $dec_tied= $dec->decode($enc_tied);
+    my $dec_normal= $dec->decode($enc_normal);
+    foreach my $i (0..$#keys) {
+        is($dec_tied->{$keys[$i]},$i, "decoded tied");
+        is($dec_normal->{$keys[$i]},$i, "decoded normal");
+        is($new_std_hash{$keys[$i]},$i, "original tied");
+        is($normal_hash{$keys[$i]},$i, "original normal");
+    }
+}
+
diff --git a/t/400_evil.t b/t/400_evil.t
index 68415a3..5fc2f76 100644
--- a/t/400_evil.t
+++ b/t/400_evil.t
@@ -1,7 +1,6 @@
 #!perl
 use strict;
 use warnings;
-use Sereal::Encoder qw(:all);
 use Data::Dumper;
 use File::Spec;
 use Scalar::Util qw(blessed);
@@ -23,6 +22,7 @@ if (not have_encoder_and_decoder()) {
     exit 0;
 }
 
+Sereal::Encoder->import(":all");
 Sereal::Decoder->import(":all");
 
 # First, test tied hashes. Expected behaviour: We don't segfault, we don't
diff --git a/t/700_roundtrip/v1/plain_canon.t b/t/700_roundtrip/v1/plain_canon.t
index f16265e..981eb38 100644
--- a/t/700_roundtrip/v1/plain_canon.t
+++ b/t/700_roundtrip/v1/plain_canon.t
@@ -19,7 +19,7 @@ if (not $ok) {
     plan skip_all => 'Did not find right version of encoder (want 3.001006)';
 }
 else {
-    run_roundtrip_tests("plain_canon", { canonical => 1 });
+    run_roundtrip_tests("plain_canonical", { canonical => 1 });
 }
 
 pass();
diff --git a/t/700_roundtrip/v2/plain_canon.t b/t/700_roundtrip/v2/plain_canon.t
index f16265e..981eb38 100644
--- a/t/700_roundtrip/v2/plain_canon.t
+++ b/t/700_roundtrip/v2/plain_canon.t
@@ -19,7 +19,7 @@ if (not $ok) {
     plan skip_all => 'Did not find right version of encoder (want 3.001006)';
 }
 else {
-    run_roundtrip_tests("plain_canon", { canonical => 1 });
+    run_roundtrip_tests("plain_canonical", { canonical => 1 });
 }
 
 pass();
diff --git a/t/700_roundtrip/v3/plain_canon.t b/t/700_roundtrip/v3/plain_canon.t
index f16265e..981eb38 100644
--- a/t/700_roundtrip/v3/plain_canon.t
+++ b/t/700_roundtrip/v3/plain_canon.t
@@ -19,7 +19,7 @@ if (not $ok) {
     plan skip_all => 'Did not find right version of encoder (want 3.001006)';
 }
 else {
-    run_roundtrip_tests("plain_canon", { canonical => 1 });
+    run_roundtrip_tests("plain_canonical", { canonical => 1 });
 }
 
 pass();
diff --git a/t/700_roundtrip/v3/sort_keys_perl.t b/t/700_roundtrip/v3/sort_keys_perl.t
new file mode 100644
index 0000000..65f327a
--- /dev/null
+++ b/t/700_roundtrip/v3/sort_keys_perl.t
@@ -0,0 +1,29 @@
+#!perl
+use strict;
+use warnings;
+use Data::Dumper;
+use File::Spec;
+
+use lib File::Spec->catdir(qw(t lib));
+BEGIN {
+    lib->import('lib')
+        if !-d 't';
+}
+
+use Sereal::TestSet qw(:all);
+use Test::More;
+
+my $ok = have_encoder_and_decoder();
+if (not $ok) {
+    plan skip_all => 'Did not find right version of encoder';
+}
+else {
+    run_roundtrip_tests(
+        'sort_keys',      { sort_keys        => 2 }
+    );
+}
+
+
+pass();
+done_testing();
+
diff --git a/t/700_roundtrip/v3/sort_keys_perl_rev.t b/t/700_roundtrip/v3/sort_keys_perl_rev.t
new file mode 100644
index 0000000..c874eed
--- /dev/null
+++ b/t/700_roundtrip/v3/sort_keys_perl_rev.t
@@ -0,0 +1,29 @@
+#!perl
+use strict;
+use warnings;
+use Data::Dumper;
+use File::Spec;
+
+use lib File::Spec->catdir(qw(t lib));
+BEGIN {
+    lib->import('lib')
+        if !-d 't';
+}
+
+use Sereal::TestSet qw(:all);
+use Test::More;
+
+my $ok = have_encoder_and_decoder();
+if (not $ok) {
+    plan skip_all => 'Did not find right version of encoder';
+}
+else {
+    run_roundtrip_tests(
+        'sort_keys',      { sort_keys        => 3 }
+    );
+}
+
+
+pass();
+done_testing();
+
diff --git a/t/800_threads.t b/t/800_threads.t
index 089a304..42e8088 100644
--- a/t/800_threads.t
+++ b/t/800_threads.t
@@ -14,17 +14,27 @@ BEGIN {
     exit(0);
   }
 }
+use File::Spec;
+use lib File::Spec->catdir(qw(t lib));
+BEGIN {
+    lib->import('lib')
+        if !-d 't';
+}
+use Sereal::TestSet qw(:all);
 
 use Sereal::Encoder;
 plan tests => 1;
 use threads;
+use threads::shared;
 
 sub foo {}
 SCOPE: {
-  my $enc = Sereal::Encoder->new;
+    my $dat= shared_clone([undef]);
+    my $enc = Sereal::Encoder->new;
 
-  my $thr = threads->new(\&foo);
-  $thr->join;
+    my $thr = threads->new(\&foo);
+    $thr->join;
+    my $encoded= $enc->encode($dat);
 }
 
 pass();
diff --git a/t/lib/Sereal/TestSet.pm b/t/lib/Sereal/TestSet.pm
index 01fc12c..9a650bb 100644
--- a/t/lib/Sereal/TestSet.pm
+++ b/t/lib/Sereal/TestSet.pm
@@ -15,11 +15,38 @@ use Scalar::Util qw(reftype blessed refaddr);
 use Config;
 use Carp qw(confess);
 use Storable qw(dclone);
+use Cwd;
 
 # Dynamically load constants from whatever is being tested
-our ($Class, $ConstClass);
+our ($Class, $ConstClass, $InRepo);
+sub get_git_top_dir {
+    my @dirs = (0, 1, 2, 4);
+    for my $d (@dirs) {
+        my $tdir = File::Spec->catdir(map File::Spec->updir, 1..$d);
+        my $gdir = File::Spec->catdir($tdir, '.git');
+        return $tdir
+            if -d $gdir;
+    }
+    return();
+}
+
+BEGIN{
+    if (defined(my $top_dir = get_git_top_dir())) {
+        for my $need ('Encoder', 'Decoder') {
+            my $blib_dir = File::Spec->catdir($top_dir, 'Perl', $need, "blib");
+            if (-d $blib_dir) {
+                require blib;
+                blib->import($blib_dir);
+            }
+        }
+        $InRepo=1;
+    }
+}
 BEGIN {
-    if (-e "lib/Sereal/Encoder.pm") {
+    if (-e "lib/Sereal.pm") {
+        $Class = 'Sereal::Encoder';
+    }
+    elsif (-e "lib/Sereal/Encoder.pm") {
         $Class = 'Sereal::Encoder';
     }
     elsif (-e "lib/Sereal/Decoder.pm") {
@@ -31,7 +58,7 @@ BEGIN {
     elsif (-e "lib/Sereal/Splitter.pm") {
         $Class = 'Sereal::Splitter';
     } else {
-        die "Could not find an applicable Sereal constants location";
+        die "Could not find an applicable Sereal constants location (in: ",cwd(),")";
     }
     $ConstClass = $Class . "::Constants";
     eval "use $ConstClass ':all'; 1"
@@ -601,16 +628,6 @@ sub setup_tests {
 }
 
 
-sub get_git_top_dir {
-    my @dirs = (0, 1, 2);
-    for my $d (@dirs) {
-        my $tdir = File::Spec->catdir(map File::Spec->updir, 1..$d);
-        my $gdir = File::Spec->catdir($tdir, '.git');
-        return $tdir
-            if -d $gdir;
-    }
-    return();
-}
 
 sub have_encoder_and_decoder {
     my ($min_v)= @_;
@@ -618,13 +635,6 @@ sub have_encoder_and_decoder {
     my $need = $Class =~ /Encoder/ ? "Decoder" : "Encoder";
     my $need_class = "Sereal::$need";
 
-    if (defined(my $top_dir = get_git_top_dir())) {
-        my $blib_dir = File::Spec->catdir($top_dir, 'Perl', $need, "blib");
-        if (-d $blib_dir) {
-            require blib;
-            blib->import($blib_dir);
-        }
-    }
     eval "use $Class; 1"
     or do {
         note("Could not locate $Class for testing" . ($@ ? " (Exception: $@)" : ""));
@@ -638,7 +648,7 @@ sub have_encoder_and_decoder {
     };
     my $cmp_v = $need_class->VERSION;
     if ($min_v and $cmp_v < $min_v) {
-        note("Could not load correct version of $need_class for testing "
+        diag("Could not load correct version of $need_class for testing "
              ."(got: $cmp_v, needed at least $min_v)");
         return;
     }
@@ -646,7 +656,7 @@ sub have_encoder_and_decoder {
     $cmp_v = sprintf("%.2f", int($cmp_v*100)/100);
     my %compat_versions = map {$_ => 1} $Class->_test_compat();
     if (not defined $cmp_v or not exists $compat_versions{$cmp_v}) {
-        note("Could not load correct version of $need_class for testing "
+        diag("Could not load correct version of $need_class for testing "
              ."(got: $cmp_v, needed any of ".join(", ", keys %compat_versions).")");
         return();
     }
@@ -681,6 +691,10 @@ my $eng2= "1e3";
 
 my $sum= $eng0e0 + $eng0e1 + $eng2;
 
+sub encoder_required {
+    my ($ver, $name)= @_;
+    return "" . ( $Sereal::Encoder::VERSION < $ver ? "TODO " : "") . $name;
+}
 
 sub _get_roundtrip_tests {
     my @ScalarRoundtripTests = (
@@ -734,8 +748,8 @@ sub _get_roundtrip_tests {
             1023,1024,1025,
             8191,8192,8193,
         )),
-        ( map { [ ( $Sereal::Encoder::VERSION < 3.005002 ? "TODO" : "")
-                    . " troublesome num/strs '$_'", $_ ] } @numstr ),
+        ( map { [ encoder_required(3.005002, " troublesome num/strs '$_'"),
+                  $_ ] } @numstr ),
         ["long latin1 string", "Ã¼ll" x 10000],
         ["long utf8 string", do {use utf8; " ×¢×××× ××©×" x 10000}],
         ["long utf8 string with only ascii", do {use utf8; "foo" x 10000}],
@@ -810,8 +824,27 @@ sub _get_roundtrip_tests {
         [ "undef", [\undef, \undef] ],
     );
 
+    my @blessed_array_check1;
+    $blessed_array_check1[0]= "foo";
+    $blessed_array_check1[1]= bless \$blessed_array_check1[0], "BlessedArrayCheck";
+    $blessed_array_check1[2]= \$blessed_array_check1[0];
+
+    my @blessed_array_check2= (3,0,0,3);
+    $blessed_array_check2[1]= \$blessed_array_check2[0];
+    $blessed_array_check2[2]= \$blessed_array_check2[3];
+    bless \$blessed_array_check2[0], "BlessedArrayCheck";
+    bless \$blessed_array_check2[3], "BlessedArrayCheck";
+
+    my @sc_array=(1,1);
+    $sc_array[0]=bless \$sc_array[1], "BlessedArrayLeft";
+    $sc_array[1]=bless \$sc_array[0], "BlessedArrayRight";
+
+
     my @RoundtripTests = (
         @ScalarRoundtripTests,
+        [ encoder_required(3.006006,"BlessedArrayCheck 1"), \@blessed_array_check1 ],
+        [ encoder_required(3.006006,"BlessedArrayCheck 2"), \@blessed_array_check2 ],
+        [ encoder_required(3.006006,"Scalar Cross Blessed Array"), \@sc_array ],
 
         ["[{foo => 1}, {foo => 2}] - repeated hash keys",
           [{foo => 1}, {foo => 2}] ],
@@ -1099,7 +1132,8 @@ sub run_roundtrip_tests_internal {
             eval {$decoded2 = $dec->($encoded2); 1}
                 or do {
                     my $err = $@ || 'Zombie error';
-                    diag("Got error while encoding the second time: $err");
+                    diag("Got error while decoding the second time: $err");
+                    # hobodecode($encoded2);
                 };
 
             defined($decoded2) == defined($data)
@@ -1127,7 +1161,7 @@ sub run_roundtrip_tests_internal {
             eval {$decoded3 = $dec->($encoded3); 1}
                 or do {
                     my $err = $@ || 'Zombie error';
-                    diag("Got error while encoding the third time: $err");
+                    diag("Got error while decoding the third time: $err");
                 };
 
             defined($decoded3) == defined($data)