Compare the hash of files with identical mtime/size #5589
authorChristian Kamm <mail@ckamm.de>
Wed, 14 Jun 2017 10:14:46 +0000 (12:14 +0200)
committerChristian Kamm <mail@ckamm.de>
Thu, 15 Jun 2017 11:54:16 +0000 (13:54 +0200)
* For conflicts where mtime and size are identical:

  a) If there's no remote checksum, skip (unchanged)
  b) If there's a remote checksum that's a useful hash, create a
     PropagateDownload job and compute the local hash. If the hashes
     are identical, don't download the file and just update metadata.

* Avoid exposing the existence of checksumTypeId beyond the database
  layer. This makes handling checksums easier in general because they
  can usually be treated as a single blob.

  This change was prompted by the difficulty of producing file_stat_t
  entries uniformly from PROPFINDs and the database.

25 files changed:
csync/src/csync.c
csync/src/csync.h
csync/src/csync_private.h
csync/src/csync_reconcile.c
csync/src/csync_statedb.c
csync/src/csync_update.c
csync/src/vio/csync_vio_file_stat.c
src/libsync/checksums.cpp
src/libsync/checksums.h
src/libsync/discoveryphase.cpp
src/libsync/propagatedownload.cpp
src/libsync/propagatedownload.h
src/libsync/propagateremotemove.cpp
src/libsync/propagateupload.cpp
src/libsync/propagateupload.h
src/libsync/propagateuploadng.cpp
src/libsync/propagateuploadv1.cpp
src/libsync/propagatorjobs.cpp
src/libsync/syncengine.cpp
src/libsync/syncfileitem.h
src/libsync/syncjournaldb.cpp
src/libsync/syncjournalfilerecord.cpp
src/libsync/syncjournalfilerecord.h
test/testsyncengine.cpp
test/testsyncjournaldb.cpp

index abac02efeebade6957a16bb7a503f06913549a15..9af6f0fcf8762af0c6d30408a11621b195f46b61 100644 (file)
@@ -376,8 +376,7 @@ static int _csync_treewalk_visitor(void *obj, void *data) {
 
       trav.error_status = cur->error_status;
       trav.has_ignored_files = cur->has_ignored_files;
-      trav.checksum = cur->checksum;
-      trav.checksumTypeId = cur->checksumTypeId;
+      trav.checksumHeader = cur->checksumHeader;
 
       if( other_node ) {
           csync_file_stat_t *other_stat = (csync_file_stat_t*)other_node->data;
@@ -670,7 +669,7 @@ void csync_file_stat_free(csync_file_stat_t *st)
     SAFE_FREE(st->directDownloadCookies);
     SAFE_FREE(st->etag);
     SAFE_FREE(st->destpath);
-    SAFE_FREE(st->checksum);
+    SAFE_FREE(st->checksumHeader);
     SAFE_FREE(st);
   }
 }
index c8664a3784545f94101db8aec33cc1e726d502eb..949c9efd237e6a80fe4ca710802cdffc035d1b06 100644 (file)
@@ -223,6 +223,10 @@ struct csync_vio_file_stat_s {
   enum csync_vio_file_flags_e flags;
 
   char *original_name; // only set if locale conversion fails
+
+  // For remote file stats: the highest quality checksum the server provided
+  // in the "SHA1:324315da2143" form.
+  char *checksumHeader;
 };
 
 csync_vio_file_stat_t OCSYNC_EXPORT *csync_vio_file_stat_new(void);
@@ -262,8 +266,7 @@ struct csync_tree_walk_file_s {
     char *directDownloadUrl;
     char *directDownloadCookies;
 
-    const char *checksum;
-    uint32_t checksumTypeId;
+    const char *checksumHeader;
 
     struct {
         int64_t     size;
@@ -304,8 +307,8 @@ typedef int (*csync_vio_stat_hook) (csync_vio_handle_t *dhhandle,
                                                               void *userdata);
 
 /* Compute the checksum of the given \a checksumTypeId for \a path. */
-typedef const char* (*csync_checksum_hook) (
-        const char *path, uint32_t checksumTypeId, void *userdata);
+typedef const char *(*csync_checksum_hook)(
+    const char *path, const char *otherChecksumHeader, void *userdata);
 
 /**
  * @brief Allocate a csync context.
index a10b09f6f3ad547b5477888538fcce773ee27180..11775f345db8971841d5f5bf3a48de3c7699ab01 100644 (file)
@@ -191,8 +191,11 @@ struct csync_file_stat_s {
   char *directDownloadCookies;
   char remotePerm[REMOTE_PERM_BUF_SIZE+1];
 
-  const char *checksum;
-  uint32_t checksumTypeId;
+  // In the local tree, this can hold a checksum and its type if it is
+  //   computed during discovery for some reason.
+  // In the remote tree, this will have the server checksum, if available.
+  // In both cases, the format is "SHA1:baff".
+  const char *checksumHeader;
 
   CSYNC_STATUS error_status;
 
index 9d716a80c280ffd34b80c3f3b20fcad7f5e1d5d2..59156e800ab0b725beb59475cd4f0e70bd6bea6e 100644 (file)
@@ -65,6 +65,19 @@ static c_rbnode_t *_csync_check_ignored(c_rbtree_t *tree, const char *path, int
     }
 }
 
+/* Returns true if we're reasonably certain that hash equality
+ * for the header means content equality.
+ *
+ * Cryptographic safety is not required - this is mainly
+ * intended to rule out checksums like Adler32 that are not intended for
+ * hashing and have high likelihood of collision with particular inputs.
+ */
+static bool _csync_is_collision_safe_hash(const char *checksum_header)
+{
+    return strncmp(checksum_header, "SHA1:", 5) == 0
+        || strncmp(checksum_header, "MD5:", 4) == 0;
+}
+
 /**
  * The main function in the reconcile pass.
  *
@@ -272,11 +285,31 @@ static int _csync_merge_algorithm_visitor(void *obj, void *data) {
                     // Folders of the same path are always considered equals
                     is_conflict = false;
                 } else {
+                    // If the size or mtime is different, it's definitely a conflict.
                     is_conflict = ((other->size != cur->size) || (other->modtime != cur->modtime));
-                    // FIXME: do a binary comparision of the file here because of the following
-                    // edge case:
-                    // The files could still have different content, even though the mtime
-                    // and size are the same.
+
+                    // It could be a conflict even if size and mtime match!
+                    //
+                    // In older client versions we always treated these cases as a
+                    // non-conflict. This behavior is preserved in case the server
+                    // doesn't provide a suitable content hash.
+                    //
+                    // When it does have one, however, we do create a job, but the job
+                    // will compare hashes and avoid the download if they are equal.
+                    const char *remoteChecksumHeader =
+                        (ctx->current == REMOTE_REPLICA ? cur->checksumHeader : other->checksumHeader);
+                    if (remoteChecksumHeader) {
+                        is_conflict |= _csync_is_collision_safe_hash(remoteChecksumHeader);
+                    }
+
+                    // SO: If there is no checksum, we can have !is_conflict here
+                    // even though the files have different content! This is an
+                    // intentional tradeoff. Downloading and comparing files would
+                    // be technically correct in this situation but leads to too
+                    // much waste.
+                    // In particular this kind of NEW/NEW situation with identical
+                    // sizes and mtimes pops up when the local database is lost for
+                    // whatever reason.
                 }
                 if (ctx->current == REMOTE_REPLICA) {
                     // If the files are considered equal, only update the DB with the etag from remote
index 9b87985a8d8867a499152d8e89e417db765e3b47..056ca48dd6b1d52873dfc8d37ada877656744a9a 100644 (file)
@@ -225,7 +225,12 @@ int csync_statedb_close(CSYNC *ctx) {
   return rc;
 }
 
-#define METADATA_COLUMNS "phash, pathlen, path, inode, uid, gid, mode, modtime, type, md5, fileid, remotePerm, filesize, ignoredChildrenRemote, contentChecksum, contentChecksumTypeId"
+#define METADATA_QUERY                                                                      \
+    "phash, pathlen, path, inode, uid, gid, mode, modtime, type, md5, fileid, remotePerm, " \
+    "filesize, ignoredChildrenRemote, "                                                     \
+    "contentchecksumtype.name || ':' || contentChecksum "                                   \
+    "FROM metadata "                                                                        \
+    "LEFT JOIN checksumtype as contentchecksumtype ON metadata.contentChecksumTypeId == contentchecksumtype.id"
 
 // This funciton parses a line from the metadata table into the given csync_file_stat
 // structure which it is also allocating.
@@ -287,9 +292,8 @@ static int _csync_file_stat_from_metadata_table( csync_file_stat_t **st, sqlite3
             if(column_count > 13) {
                 (*st)->has_ignored_files = sqlite3_column_int(stmt, 13);
             }
-            if(column_count > 15 && sqlite3_column_int(stmt, 15)) {
-                (*st)->checksum = c_strdup( (char*) sqlite3_column_text(stmt, 14));
-                (*st)->checksumTypeId = sqlite3_column_int(stmt, 15);
+            if (column_count > 14 && sqlite3_column_text(stmt, 14)) {
+                (*st)->checksumHeader = c_strdup((char *)sqlite3_column_text(stmt, 14));
             }
 
         }
@@ -313,7 +317,7 @@ csync_file_stat_t *csync_statedb_get_stat_by_hash(CSYNC *ctx,
   }
 
   if( ctx->statedb.by_hash_stmt == NULL ) {
-      const char *hash_query = "SELECT " METADATA_COLUMNS " FROM metadata WHERE phash=?1";
+      const char *hash_query = "SELECT " METADATA_QUERY " WHERE phash=?1";
 
       SQLITE_BUSY_HANDLED(sqlite3_prepare_v2(ctx->statedb.db, hash_query, strlen(hash_query), &ctx->statedb.by_hash_stmt, NULL));
       ctx->statedb.lastReturnValue = rc;
@@ -356,7 +360,7 @@ csync_file_stat_t *csync_statedb_get_stat_by_file_id(CSYNC *ctx,
     }
 
     if( ctx->statedb.by_fileid_stmt == NULL ) {
-        const char *query = "SELECT " METADATA_COLUMNS " FROM metadata WHERE fileid=?1";
+        const char *query = "SELECT " METADATA_QUERY " WHERE fileid=?1";
 
         SQLITE_BUSY_HANDLED(sqlite3_prepare_v2(ctx->statedb.db, query, strlen(query), &ctx->statedb.by_fileid_stmt, NULL));
         ctx->statedb.lastReturnValue = rc;
@@ -396,7 +400,7 @@ csync_file_stat_t *csync_statedb_get_stat_by_inode(CSYNC *ctx,
   }
 
   if( ctx->statedb.by_inode_stmt == NULL ) {
-      const char *inode_query = "SELECT " METADATA_COLUMNS " FROM metadata WHERE inode=?1";
+      const char *inode_query = "SELECT " METADATA_QUERY " WHERE inode=?1";
 
       SQLITE_BUSY_HANDLED(sqlite3_prepare_v2(ctx->statedb.db, inode_query, strlen(inode_query), &ctx->statedb.by_inode_stmt, NULL));
       ctx->statedb.lastReturnValue = rc;
@@ -439,7 +443,7 @@ int csync_statedb_get_below_path( CSYNC *ctx, const char *path ) {
      * In other words, anything that is between  path+'/' and path+'0',
      * (because '0' follows '/' in ascii)
      */
-    const char *below_path_query = "SELECT " METADATA_COLUMNS " FROM metadata WHERE path > (?||'/') AND path < (?||'0') ORDER BY path||'/' ASC";
+    const char *below_path_query = "SELECT " METADATA_QUERY " WHERE path > (?||'/') AND path < (?||'0') ORDER BY path||'/' ASC";
     SQLITE_BUSY_HANDLED(sqlite3_prepare_v2(ctx->statedb.db, below_path_query, -1, &stmt, NULL));
     ctx->statedb.lastReturnValue = rc;
     if( rc != SQLITE_OK ) {
index 1951529127497f6b7605ac2089024d9b00a4ac27..d7587765c3b1457238d89dfd22c709686063fd49 100644 (file)
@@ -287,16 +287,15 @@ static int _csync_detect_update(CSYNC *ctx, const char *file,
             // Checksum comparison at this stage is only enabled for .eml files,
             // check #4754 #4755
             bool isEmlFile = csync_fnmatch("*.eml", file, FNM_CASEFOLD) == 0;
-            if (isEmlFile && fs->size == tmp->size && tmp->checksumTypeId) {
+            if (isEmlFile && fs->size == tmp->size && tmp->checksumHeader) {
                 if (ctx->callbacks.checksum_hook) {
-                    st->checksum = ctx->callbacks.checksum_hook(
-                                file, tmp->checksumTypeId,
-                                ctx->callbacks.checksum_userdata);
+                    st->checksumHeader = ctx->callbacks.checksum_hook(
+                        file, tmp->checksumHeader,
+                        ctx->callbacks.checksum_userdata);
                 }
                 bool checksumIdentical = false;
-                if (st->checksum) {
-                    st->checksumTypeId = tmp->checksumTypeId;
-                    checksumIdentical = strncmp(st->checksum, tmp->checksum, 1000) == 0;
+                if (st->checksumHeader) {
+                    checksumIdentical = strncmp(st->checksumHeader, tmp->checksumHeader, 1000) == 0;
                 }
                 if (checksumIdentical) {
                     CSYNC_LOG(CSYNC_LOG_PRIORITY_TRACE, "NOTE: Checksums are identical, file did not actually change: %s", path);
@@ -381,15 +380,14 @@ static int _csync_detect_update(CSYNC *ctx, const char *file,
 
 
             // Verify the checksum where possible
-            if (isRename && tmp->checksumTypeId && ctx->callbacks.checksum_hook
-                    && fs->type == CSYNC_VIO_FILE_TYPE_REGULAR) {
-                st->checksum = ctx->callbacks.checksum_hook(
-                            file, tmp->checksumTypeId,
-                            ctx->callbacks.checksum_userdata);
-                if (st->checksum) {
-                    CSYNC_LOG(CSYNC_LOG_PRIORITY_TRACE, "checking checksum of potential rename %s %s <-> %s", path, st->checksum, tmp->checksum);
-                    st->checksumTypeId = tmp->checksumTypeId;
-                    isRename = strncmp(st->checksum, tmp->checksum, 1000) == 0;
+            if (isRename && tmp->checksumHeader && ctx->callbacks.checksum_hook
+                && fs->type == CSYNC_VIO_FILE_TYPE_REGULAR) {
+                st->checksumHeader = ctx->callbacks.checksum_hook(
+                    file, tmp->checksumHeader,
+                    ctx->callbacks.checksum_userdata);
+                if (st->checksumHeader) {
+                    CSYNC_LOG(CSYNC_LOG_PRIORITY_TRACE, "checking checksum of potential rename %s %s <-> %s", path, st->checksumHeader, tmp->checksumHeader);
+                    isRename = strncmp(st->checksumHeader, tmp->checksumHeader, 1000) == 0;
                 }
             }
 
@@ -506,6 +504,11 @@ out:
       strncpy(st->remotePerm, fs->remotePerm, REMOTE_PERM_BUF_SIZE);
   }
 
+  // For the remote: propagate the discovered checksum
+  if (fs->checksumHeader && ctx->current == REMOTE_REPLICA) {
+      st->checksumHeader = c_strdup(fs->checksumHeader);
+  }
+
   st->phash = h;
   st->pathlen = len;
   memcpy(st->path, (len ? path : ""), len + 1);
index bc935d375992b9df64cf8e59240b93db96bfcd45..8ebb64f3370167a7b17fbf161448cbc48ef7b52e 100644 (file)
@@ -40,6 +40,9 @@ csync_vio_file_stat_t* csync_vio_file_stat_copy(csync_vio_file_stat_t *file_stat
     if (file_stat_cpy->directDownloadUrl) {
         file_stat_cpy->directDownloadUrl = c_strdup(file_stat_cpy->directDownloadUrl);
     }
+    if (file_stat_cpy->checksumHeader) {
+        file_stat_cpy->checksumHeader = c_strdup(file_stat_cpy->checksumHeader);
+    }
     file_stat_cpy->name = c_strdup(file_stat_cpy->name);
     return file_stat_cpy;
 }
@@ -57,6 +60,7 @@ void csync_vio_file_stat_destroy(csync_vio_file_stat_t *file_stat) {
   SAFE_FREE(file_stat->directDownloadCookies);
   SAFE_FREE(file_stat->name);
   SAFE_FREE(file_stat->original_name);
+  SAFE_FREE(file_stat->checksumHeader);
   SAFE_FREE(file_stat);
 }
 
index 00818442fb5a5c0336d39ed34c6125205be43251..eaf36379ccb152b14ae7246a7dc3d55b9f6014ab 100644 (file)
@@ -81,6 +81,8 @@ Q_LOGGING_CATEGORY(lcChecksums, "sync.checksums", QtInfoMsg)
 
 QByteArray makeChecksumHeader(const QByteArray &checksumType, const QByteArray &checksum)
 {
+    if (checksumType.isEmpty() || checksum.isEmpty())
+        return QByteArray();
     QByteArray header = checksumType;
     header.append(':');
     header.append(checksum);
@@ -105,6 +107,16 @@ bool parseChecksumHeader(const QByteArray &header, QByteArray *type, QByteArray
     return true;
 }
 
+
+QByteArray parseChecksumHeaderType(const QByteArray &header)
+{
+    const auto idx = header.indexOf(':');
+    if (idx < 0) {
+        return QByteArray();
+    }
+    return header.left(idx);
+}
+
 bool uploadChecksumEnabled()
 {
     static bool enabled = qgetenv("OWNCLOUD_DISABLE_CHECKSUM_UPLOAD").isEmpty();
@@ -214,39 +226,27 @@ void ValidateChecksumHeader::slotChecksumCalculated(const QByteArray &checksumTy
     emit validated(checksumType, checksum);
 }
 
-CSyncChecksumHook::CSyncChecksumHook(SyncJournalDb *journal)
-    : _journal(journal)
+CSyncChecksumHook::CSyncChecksumHook()
 {
 }
 
-const char *CSyncChecksumHook::hook(const char *path, uint32_t checksumTypeId, void *this_obj)
+const char *CSyncChecksumHook::hook(const char *path, const char *otherChecksumHeader, void * /*this_obj*/)
 {
-    CSyncChecksumHook *checksumHook = static_cast<CSyncChecksumHook *>(this_obj);
-    QByteArray checksum = checksumHook->compute(QString::fromUtf8(path), checksumTypeId);
+    QByteArray type = parseChecksumHeaderType(QByteArray(otherChecksumHeader));
+    if (type.isEmpty())
+        return NULL;
+
+    QByteArray checksum = ComputeChecksum::computeNow(path, type);
     if (checksum.isNull()) {
+        qCWarning(lcChecksums) << "Failed to compute checksum" << type << "for" << path;
         return NULL;
     }
 
-    char *result = (char *)malloc(checksum.size() + 1);
-    memcpy(result, checksum.constData(), checksum.size());
-    result[checksum.size()] = 0;
+    QByteArray checksumHeader = makeChecksumHeader(type, checksum);
+    char *result = (char *)malloc(checksumHeader.size() + 1);
+    memcpy(result, checksumHeader.constData(), checksumHeader.size());
+    result[checksumHeader.size()] = 0;
     return result;
 }
 
-QByteArray CSyncChecksumHook::compute(const QString &path, int checksumTypeId)
-{
-    QByteArray checksumType = _journal->getChecksumType(checksumTypeId);
-    if (checksumType.isEmpty()) {
-        qCWarning(lcChecksums) << "Checksum type" << checksumTypeId << "not found";
-        return QByteArray();
-    }
-
-    QByteArray checksum = ComputeChecksum::computeNow(path, checksumType);
-    if (checksum.isNull()) {
-        qCWarning(lcChecksums) << "Failed to compute checksum" << checksumType << "for" << path;
-        return QByteArray();
-    }
-
-    return checksum;
-}
 }
index 5d88b0bcb6868b4b1598e0e7c9eafc7059eed462..a7259f7eccb16e66c209cb10b4685855ff30bf64 100644 (file)
@@ -31,6 +31,9 @@ QByteArray makeChecksumHeader(const QByteArray &checksumType, const QByteArray &
 /// Parses a checksum header
 bool parseChecksumHeader(const QByteArray &header, QByteArray *type, QByteArray *checksum);
 
+/// Convenience for getting the type from a checksum header, null if none
+QByteArray parseChecksumHeaderType(const QByteArray &header);
+
 /// Checks OWNCLOUD_DISABLE_CHECKSUM_UPLOAD
 bool uploadChecksumEnabled();
 
@@ -119,20 +122,15 @@ class OWNCLOUDSYNC_EXPORT CSyncChecksumHook : public QObject
 {
     Q_OBJECT
 public:
-    explicit CSyncChecksumHook(SyncJournalDb *journal);
+    explicit CSyncChecksumHook();
 
     /**
-     * Returns the checksum value for \a path for the given \a checksumTypeId.
+     * Returns the checksum value for \a path that is comparable to \a otherChecksum.
      *
      * Called from csync, where a instance of CSyncChecksumHook has
      * to be set as userdata.
      * The return value will be owned by csync.
      */
-    static const char *hook(const char *path, uint32_t checksumTypeId, void *this_obj);
-
-    QByteArray compute(const QString &path, int checksumTypeId);
-
-private:
-    SyncJournalDb *_journal;
+    static const char *hook(const char *path, const char *otherChecksumHeader, void *this_obj);
 };
 }
index c8f13784be060cb11489d25a0b66838fa3253b45..16b98faeb2c2da9e5d5d8ceb438e4920c25d6870 100644 (file)
@@ -272,7 +272,8 @@ void DiscoverySingleDirectoryJob::start()
           << "http://owncloud.org/ns:id"
           << "http://owncloud.org/ns:downloadURL"
           << "http://owncloud.org/ns:dDC"
-          << "http://owncloud.org/ns:permissions";
+          << "http://owncloud.org/ns:permissions"
+          << "http://owncloud.org/ns:checksums";
     if (_isRootPath)
         props << "http://owncloud.org/ns:data-fingerprint";
 
@@ -294,6 +295,28 @@ void DiscoverySingleDirectoryJob::abort()
     }
 }
 
+/**
+ * Returns the highest-quality checksum in a 'checksums'
+ * property retrieved from the server.
+ *
+ * Example: "ADLER32:1231 SHA1:ab124124 MD5:2131affa21"
+ *       -> "SHA1:ab124124"
+ */
+static QByteArray findBestChecksum(const QByteArray &checksums)
+{
+    int i = 0;
+    // The order of the searches here defines the preference ordering.
+    if (-1 != (i = checksums.indexOf("SHA1:"))
+        || -1 != (i = checksums.indexOf("MD5:"))
+        || -1 != (i = checksums.indexOf("Adler32:"))) {
+        // Now i is the start of the best checksum
+        // Grab it until the next space or end of string.
+        auto checksum = checksums.mid(i);
+        return checksum.mid(0, checksum.indexOf(" "));
+    }
+    return QByteArray();
+}
+
 static csync_vio_file_stat_t *propertyMapToFileStat(const QMap<QString, QString> &map)
 {
     csync_vio_file_stat_t *file_stat = csync_vio_file_stat_new();
@@ -343,6 +366,11 @@ static csync_vio_file_stat_t *propertyMapToFileStat(const QMap<QString, QString>
             } else {
                 qCWarning(lcDiscovery) << "permissions too large" << v;
             }
+        } else if (property == "checksums") {
+            QByteArray checksum = findBestChecksum(value.toUtf8());
+            if (!checksum.isEmpty()) {
+                file_stat->checksumHeader = strdup(checksum.constData());
+            }
         }
     }
 
index e669369087b2eae7ef2d0662a1536263a7cd4185..8cc87ccac9be8b7fc8cbd548208eb9e7e106e1cd 100644 (file)
@@ -344,6 +344,41 @@ void PropagateDownloadFile::start()
         }
     }
 
+    // If we have a conflict where size and mtime are identical,
+    // compare the remote checksum to the local one.
+    // Maybe it's not a real conflict and no download is necessary!
+    if (_item->_instruction == CSYNC_INSTRUCTION_CONFLICT
+        && _item->_size == _item->log._other_size
+        && _item->_modtime == _item->log._other_modtime
+        && !_item->_checksumHeader.isEmpty()) {
+        qCDebug(lcPropagateDownload) << _item->_file << "may not need download, computing checksum";
+        auto computeChecksum = new ComputeChecksum(this);
+        computeChecksum->setChecksumType(parseChecksumHeaderType(_item->_checksumHeader));
+        connect(computeChecksum, SIGNAL(done(QByteArray, QByteArray)),
+            SLOT(conflictChecksumComputed(QByteArray, QByteArray)));
+        computeChecksum->start(propagator()->getFilePath(_item->_file));
+        return;
+    }
+
+    startDownload();
+}
+
+void PropagateDownloadFile::conflictChecksumComputed(const QByteArray &checksumType, const QByteArray &checksum)
+{
+    if (makeChecksumHeader(checksumType, checksum) == _item->_checksumHeader) {
+        qCDebug(lcPropagateDownload) << _item->_file << "remote and local checksum match";
+        // No download necessary, just update metadata
+        updateMetadata(/*isConflict=*/false);
+        return;
+    }
+    startDownload();
+}
+
+void PropagateDownloadFile::startDownload()
+{
+    if (propagator()->_abortRequested.fetchAndAddRelaxed(0))
+        return;
+
     // do a klaas' case clash check.
     if (propagator()->localFileNameClash(_item->_file)) {
         done(SyncFileItem::NormalError, tr("File %1 can not be downloaded because of a local file name clash!").arg(QDir::toNativeSeparators(_item->_file)));
@@ -672,7 +707,7 @@ namespace { // Anonymous namespace for the recall feature
                 continue;
             }
 
-            qCInfo(lcPropagateDownload) << "Recalling" << localRecalledFile << "Checksum:" << record._contentChecksumType << record._contentChecksum;
+            qCInfo(lcPropagateDownload) << "Recalling" << localRecalledFile << "Checksum:" << record._checksumHeader;
 
             QString targetPath = makeRecallFileName(recalledFile);
 
@@ -717,8 +752,7 @@ void PropagateDownloadFile::transmissionChecksumValidated(const QByteArray &chec
 
 void PropagateDownloadFile::contentChecksumComputed(const QByteArray &checksumType, const QByteArray &checksum)
 {
-    _item->_contentChecksum = checksum;
-    _item->_contentChecksumType = checksumType;
+    _item->_checksumHeader = makeChecksumHeader(checksumType, checksum);
 
     downloadFinished();
 }
@@ -827,6 +861,13 @@ void PropagateDownloadFile::downloadFinished()
     // Get up to date information for the journal.
     _item->_size = FileSystem::getSize(fn);
 
+    updateMetadata(isConflict);
+}
+
+void PropagateDownloadFile::updateMetadata(bool isConflict)
+{
+    QString fn = propagator()->getFilePath(_item->_file);
+
     if (!propagator()->_journal->setFileRecord(SyncJournalFileRecord(*_item, fn))) {
         done(SyncFileItem::FatalError, tr("Error writing metadata to the database"));
         return;
index 69962131b5eb0232940984751a60645990cb455a..26361541c20b797059360e906f7c48e77e31f352 100644 (file)
@@ -106,6 +106,40 @@ private slots:
 /**
  * @brief The PropagateDownloadFile class
  * @ingroup libsync
+ *
+ * This is the flow:
+
+\code{.unparsed}
+  start()
+    |
+    | deleteExistingFolder() if enabled
+    |
+    +--> mtime and size identical?
+    |    then compute the local checksum
+    |                               done?-> conflictChecksumComputed()
+    |                                              |
+    |                         checksum differs?    |
+    +-> startDownload() <--------------------------+
+          |                                        |
+          +-> run a GETFileJob                     | checksum identical?
+                                                   |
+      done?-> slotGetFinished()                    |
+                |                                  |
+                +-> validate checksum header       |
+                                                   |
+      done?-> transmissionChecksumValidated()      |
+                |                                  |
+                +-> compute the content checksum   |
+                                                   |
+      done?-> contentChecksumComputed()            |
+                |                                  |
+                +-> downloadFinished()             |
+                       |                           |
+    +------------------+                           |
+    |                                              |
+    +-> updateMetadata() <-------------------------+
+
+\endcode
  */
 class PropagateDownloadFile : public PropagateItemJob
 {
@@ -136,11 +170,22 @@ public:
     void setDeleteExistingFolder(bool enabled);
 
 private slots:
+    /// Called when ComputeChecksum on the local file finishes,
+    /// maybe the local and remote checksums are identical?
+    void conflictChecksumComputed(const QByteArray &checksumType, const QByteArray &checksum);
+    /// Called to start downloading the remote file
+    void startDownload();
+    /// Called when the GETFileJob finishes
     void slotGetFinished();
-    void abort() Q_DECL_OVERRIDE;
+    /// Called when the download's checksum header was validated
     void transmissionChecksumValidated(const QByteArray &checksumType, const QByteArray &checksum);
+    /// Called when the download's checksum computation is done
     void contentChecksumComputed(const QByteArray &checksumType, const QByteArray &checksum);
     void downloadFinished();
+    /// Called when it's time to update the db metadata
+    void updateMetadata(bool isConflict);
+
+    void abort() Q_DECL_OVERRIDE;
     void slotDownloadProgress(qint64, qint64);
     void slotChecksumFail(const QString &errMsg);
 
index 7f4f96c33c2dbff49a72ca10f98f49c5096aff92..69979b1a97b428aef66d9c8001c9fb9a5b88138c 100644 (file)
@@ -173,8 +173,7 @@ void PropagateRemoteMove::finalize()
     SyncJournalFileRecord record(*_item, propagator()->getFilePath(_item->_renameTarget));
     record._path = _item->_renameTarget;
     if (oldRecord.isValid()) {
-        record._contentChecksum = oldRecord._contentChecksum;
-        record._contentChecksumType = oldRecord._contentChecksumType;
+        record._checksumHeader = oldRecord._checksumHeader;
         if (record._fileSize != oldRecord._fileSize) {
             qCWarning(lcPropagateRemoteMove) << "File sizes differ on server vs sync journal: " << record._fileSize << oldRecord._fileSize;
 
index 93849dff73c4a13c8ba9030920fbf14d15def3dc..f3b0e8336b77ee034961aaadaaaa3bb40de27b5b 100644 (file)
@@ -231,9 +231,10 @@ void PropagateUploadFileCommon::slotComputeContentChecksum()
     QByteArray checksumType = contentChecksumType();
 
     // Maybe the discovery already computed the checksum?
-    if (_item->_contentChecksumType == checksumType
-        && !_item->_contentChecksum.isEmpty()) {
-        slotComputeTransmissionChecksum(checksumType, _item->_contentChecksum);
+    QByteArray existingChecksumType, existingChecksum;
+    parseChecksumHeader(_item->_checksumHeader, &existingChecksumType, &existingChecksum);
+    if (existingChecksumType == checksumType) {
+        slotComputeTransmissionChecksum(checksumType, existingChecksum);
         return;
     }
 
@@ -250,8 +251,7 @@ void PropagateUploadFileCommon::slotComputeContentChecksum()
 
 void PropagateUploadFileCommon::slotComputeTransmissionChecksum(const QByteArray &contentChecksumType, const QByteArray &contentChecksum)
 {
-    _item->_contentChecksum = contentChecksum;
-    _item->_contentChecksumType = contentChecksumType;
+    _item->_checksumHeader = makeChecksumHeader(contentChecksumType, contentChecksum);
 
 #ifdef WITH_TESTING
     _stopWatch.addLapTime(QLatin1String("ContentChecksum"));
@@ -288,13 +288,11 @@ void PropagateUploadFileCommon::slotStartUpload(const QByteArray &transmissionCh
     // When we start chunks, we will add it again, once for every chunks.
     propagator()->_activeJobList.removeOne(this);
 
-    _transmissionChecksum = transmissionChecksum;
-    _transmissionChecksumType = transmissionChecksumType;
+    _transmissionChecksumHeader = makeChecksumHeader(transmissionChecksumType, transmissionChecksum);
 
-    if (_item->_contentChecksum.isEmpty() && _item->_contentChecksumType.isEmpty()) {
-        // If the _contentChecksum was not set, reuse the transmission checksum as the content checksum.
-        _item->_contentChecksum = transmissionChecksum;
-        _item->_contentChecksumType = transmissionChecksumType;
+    // If no checksum header was not set, reuse the transmission checksum as the content checksum.
+    if (_item->_checksumHeader.isEmpty()) {
+        _item->_checksumHeader = _transmissionChecksumHeader;
     }
 
     const QString fullFilePath = propagator()->getFilePath(_item->_file);
index e9130d9340b951bdb60bb92ff916b5920efff2b6..5e16b5ff9842a2f6cadce8d5dc6d6e28143df3d2 100644 (file)
@@ -226,8 +226,7 @@ protected:
     Utility::StopWatch _stopWatch;
 #endif
 
-    QByteArray _transmissionChecksum;
-    QByteArray _transmissionChecksumType;
+    QByteArray _transmissionChecksumHeader;
 
 public:
     PropagateUploadFileCommon(OwncloudPropagator *propagator, const SyncFileItemPtr &item)
index d6c213389d3d503a6f7bf7d55eb547cce7b4b81c..45768102da69d076e76ea0a8dbfde00169cc5fcb 100644 (file)
@@ -283,9 +283,8 @@ void PropagateUploadFileNG::startNextChunk()
         if (!ifMatch.isEmpty()) {
             headers["If"] = "<" + destination.toUtf8() + "> ([" + ifMatch + "])";
         }
-        if (!_transmissionChecksumType.isEmpty()) {
-            headers[checkSumHeaderC] = makeChecksumHeader(
-                _transmissionChecksumType, _transmissionChecksum);
+        if (!_transmissionChecksumHeader.isEmpty()) {
+            headers[checkSumHeaderC] = _transmissionChecksumHeader;
         }
 
         headers["OC-Total-Length"] = QByteArray::number(fileSize);
index f8a88020ddc61107556a2a27654828323faba5ee..b48b433ab752f7a7ccff8a98f5c3b851bce45ce9 100644 (file)
@@ -103,9 +103,8 @@ void PropagateUploadFileV1::startNextChunk()
     }
     qCDebug(lcPropagateUpload) << _chunkCount << isFinalChunk << chunkStart << currentChunkSize;
 
-    if (isFinalChunk && !_transmissionChecksumType.isEmpty()) {
-        headers[checkSumHeaderC] = makeChecksumHeader(
-            _transmissionChecksumType, _transmissionChecksum);
+    if (isFinalChunk && !_transmissionChecksumHeader.isEmpty()) {
+        headers[checkSumHeaderC] = _transmissionChecksumHeader;
     }
 
     const QString fileName = propagator()->getFilePath(_item->_file);
index bda94fa11f2d735d3038b7ab874704612b75c766..5cf17b26acb1f2bfd089721530f3c7ee8202bd93 100644 (file)
@@ -241,8 +241,7 @@ void PropagateLocalRename::start()
     SyncJournalFileRecord record(*_item, targetFile);
     record._path = _item->_renameTarget;
     if (oldRecord.isValid()) {
-        record._contentChecksum = oldRecord._contentChecksum;
-        record._contentChecksumType = oldRecord._contentChecksumType;
+        record._checksumHeader = oldRecord._checksumHeader;
     }
 
     if (!_item->_isDirectory) { // Directories are saved at the end
index 00a795976f98b9f215a985cb2d2f2043306a681d..d65646ff8b4185d5e103903ac651e94db53011b9 100644 (file)
@@ -73,7 +73,6 @@ SyncEngine::SyncEngine(AccountPtr account, const QString &localPath,
     , _backInTimeFiles(0)
     , _uploadLimit(0)
     , _downloadLimit(0)
-    , _checksum_hook(journal)
     , _anotherSyncNeeded(NoFollowUpSync)
 {
     qRegisterMetaType<SyncFileItem>("SyncFileItem");
@@ -426,9 +425,12 @@ int SyncEngine::treewalkFile(TREE_WALK_FILE *file, bool remote)
     }
 
     // Sometimes the discovery computes checksums for local files
-    if (!remote && file->checksum && file->checksumTypeId) {
-        item->_contentChecksum = QByteArray(file->checksum);
-        item->_contentChecksumType = _journal->getChecksumType(file->checksumTypeId);
+    if (!remote && file->checksumHeader) {
+        item->_checksumHeader = QByteArray(file->checksumHeader);
+    }
+    // For conflicts, store the remote checksum there
+    if (remote && item->_instruction == CSYNC_INSTRUCTION_CONFLICT && file->checksumHeader) {
+        item->_checksumHeader = QByteArray(file->checksumHeader);
     }
 
     // record the seen files to be able to clean the journal later
index 6273dd5c966b3bea465dd0e23aa980bdc68ecb54..3eefac5cee2f6ce4a0e1d1fb42bf939b667cdb1e 100644 (file)
@@ -190,8 +190,13 @@ public:
     quint64 _inode;
     QByteArray _fileId;
     QByteArray _remotePerm;
-    QByteArray _contentChecksum;
-    QByteArray _contentChecksumType;
+
+    // When is this set, and is it the local or the remote checksum?
+    // - if mtime or size changed locally for *.eml files (local checksum)
+    // - for potential renames of local files (local checksum)
+    // - for conflicts (remote checksum) (what about eval_rename/new reconcile?)
+    QByteArray _checksumHeader;
+
     QString _directDownloadUrl;
     QString _directDownloadCookies;
 
index c6b67fd067995d63e5a3b88c3c63532399079f18..3406c7e357219e651a26f3e53a7c380f59a70ad1 100644 (file)
@@ -28,6 +28,7 @@
 #include "version.h"
 #include "filesystem.h"
 #include "asserts.h"
+#include "checksums.h"
 
 #include "../../csync/src/std/c_jhash.h"
 
@@ -448,7 +449,7 @@ bool SyncJournalDb::checkConnect()
     _getFileRecordQuery.reset(new SqlQuery(_db));
     if (_getFileRecordQuery->prepare(
             "SELECT path, inode, uid, gid, mode, modtime, type, md5, fileid, remotePerm, filesize,"
-            "  ignoredChildrenRemote, contentChecksum, contentchecksumtype.name"
+            "  ignoredChildrenRemote, contentchecksumtype.name || ':' || contentChecksum"
             " FROM metadata"
             "  LEFT JOIN checksumtype as contentchecksumtype ON metadata.contentChecksumTypeId == contentchecksumtype.id"
             " WHERE phash=?1")) {
@@ -832,7 +833,7 @@ bool SyncJournalDb::setFileRecord(const SyncJournalFileRecord &_record)
     qCInfo(lcDb) << "Updating file record for path:" << record._path << "inode:" << record._inode
                  << "modtime:" << record._modtime << "type:" << record._type
                  << "etag:" << record._etag << "fileId:" << record._fileId << "remotePerm:" << record._remotePerm
-                 << "fileSize:" << record._fileSize << "checksum:" << record._contentChecksum << record._contentChecksumType;
+                 << "fileSize:" << record._fileSize << "checksum:" << record._checksumHeader;
 
     qlonglong phash = getPHash(record._path);
     if (checkConnect()) {
@@ -848,7 +849,9 @@ bool SyncJournalDb::setFileRecord(const SyncJournalFileRecord &_record)
         QString remotePerm(record._remotePerm);
         if (remotePerm.isEmpty())
             remotePerm = QString(); // have NULL in DB (vs empty)
-        int contentChecksumTypeId = mapChecksumType(record._contentChecksumType);
+        QByteArray checksumType, checksum;
+        parseChecksumHeader(record._checksumHeader, &checksumType, &checksum);
+        int contentChecksumTypeId = mapChecksumType(checksumType);
         _setFileRecordQuery->reset_and_clear_bindings();
         _setFileRecordQuery->bindValue(1, QString::number(phash));
         _setFileRecordQuery->bindValue(2, plen);
@@ -864,7 +867,7 @@ bool SyncJournalDb::setFileRecord(const SyncJournalFileRecord &_record)
         _setFileRecordQuery->bindValue(12, remotePerm);
         _setFileRecordQuery->bindValue(13, record._fileSize);
         _setFileRecordQuery->bindValue(14, record._serverHasIgnoredFiles ? 1 : 0);
-        _setFileRecordQuery->bindValue(15, record._contentChecksum);
+        _setFileRecordQuery->bindValue(15, checksum);
         _setFileRecordQuery->bindValue(16, contentChecksumTypeId);
 
         if (!_setFileRecordQuery->exec()) {
@@ -943,10 +946,7 @@ SyncJournalFileRecord SyncJournalDb::getFileRecord(const QString &filename)
             rec._remotePerm = _getFileRecordQuery->baValue(9);
             rec._fileSize = _getFileRecordQuery->int64Value(10);
             rec._serverHasIgnoredFiles = (_getFileRecordQuery->intValue(11) > 0);
-            rec._contentChecksum = _getFileRecordQuery->baValue(12);
-            if (!_getFileRecordQuery->nullValue(13)) {
-                rec._contentChecksumType = _getFileRecordQuery->baValue(13);
-            }
+            rec._checksumHeader = _getFileRecordQuery->baValue(12);
             _getFileRecordQuery->reset_and_clear_bindings();
         } else {
             int errId = _getFileRecordQuery->errorId();
index 6937789145ee670023a092ac65ab3688b3eba2a8..b2c65931d40d63faad457e7de270dde4bf4736e1 100644 (file)
@@ -47,8 +47,7 @@ SyncJournalFileRecord::SyncJournalFileRecord(const SyncFileItem &item, const QSt
     , _fileSize(item._size)
     , _remotePerm(item._remotePerm)
     , _serverHasIgnoredFiles(item._serverHasIgnoredFiles)
-    , _contentChecksum(item._contentChecksum)
-    , _contentChecksumType(item._contentChecksumType)
+    , _checksumHeader(item._checksumHeader)
 {
     // use the "old" inode coming with the item for the case where the
     // filesystem stat fails. That can happen if the the file was removed
@@ -106,8 +105,7 @@ SyncFileItem SyncJournalFileRecord::toSyncFileItem()
     item._size = _fileSize;
     item._remotePerm = _remotePerm;
     item._serverHasIgnoredFiles = _serverHasIgnoredFiles;
-    item._contentChecksum = _contentChecksum;
-    item._contentChecksumType = _contentChecksumType;
+    item._checksumHeader = _checksumHeader;
     return item;
 }
 
@@ -130,7 +128,6 @@ bool operator==(const SyncJournalFileRecord &lhs,
         && lhs._fileSize == rhs._fileSize
         && lhs._remotePerm == rhs._remotePerm
         && lhs._serverHasIgnoredFiles == rhs._serverHasIgnoredFiles
-        && lhs._contentChecksum == rhs._contentChecksum
-        && lhs._contentChecksumType == rhs._contentChecksumType;
+        && lhs._checksumHeader == rhs._checksumHeader;
 }
 }
index f96e85b3baff673084d64bd36b7a997b7a50f87a..c7579683b978ccbd56a7221e408a3f5368074c0f 100644 (file)
@@ -57,8 +57,7 @@ public:
     qint64 _fileSize;
     QByteArray _remotePerm;
     bool _serverHasIgnoredFiles;
-    QByteArray _contentChecksum;
-    QByteArray _contentChecksumType;
+    QByteArray _checksumHeader;
 };
 
 bool OWNCLOUDSYNC_EXPORT
index 4200d5e1b3d4e17b8c841b582e5d0b1979e3904f..1f48931da1e8f2cf26242a2196983ee1ae3b12b2 100644 (file)
@@ -321,6 +321,70 @@ private slots:
         }
     }
 
+    void testFakeConflict()
+    {
+        FakeFolder fakeFolder{ FileInfo::A12_B12_C12_S12() };
+
+        int nGET = 0;
+        fakeFolder.setServerOverride([&](QNetworkAccessManager::Operation op, const QNetworkRequest &) {
+            if (op == QNetworkAccessManager::GetOperation)
+                ++nGET;
+            return nullptr;
+        });
+
+        // For directly editing the remote checksum
+        FileInfo &remoteInfo = dynamic_cast<FileInfo &>(fakeFolder.remoteModifier());
+
+        // Base mtime with no ms content (filesystem is seconds only)
+        auto mtime = QDateTime::currentDateTime().addDays(-4);
+        mtime.setMSecsSinceEpoch(mtime.toMSecsSinceEpoch() / 1000 * 1000);
+
+        // Conflict: Same content, mtime, but no server checksum
+        //           -> ignored in reconcile
+        fakeFolder.localModifier().setContents("A/a1", 'C');
+        fakeFolder.localModifier().setModTime("A/a1", mtime);
+        fakeFolder.remoteModifier().setContents("A/a1", 'C');
+        fakeFolder.remoteModifier().setModTime("A/a1", mtime);
+        QVERIFY(fakeFolder.syncOnce());
+        QCOMPARE(nGET, 0);
+
+        // Conflict: Same content, mtime, but weak server checksum
+        //           -> ignored in reconcile
+        mtime = mtime.addDays(1);
+        fakeFolder.localModifier().setContents("A/a1", 'D');
+        fakeFolder.localModifier().setModTime("A/a1", mtime);
+        fakeFolder.remoteModifier().setContents("A/a1", 'D');
+        fakeFolder.remoteModifier().setModTime("A/a1", mtime);
+        remoteInfo.find("A/a1")->checksums = "Adler32:bad";
+        QVERIFY(fakeFolder.syncOnce());
+        QCOMPARE(nGET, 0);
+
+        // Conflict: Same content, mtime, but server checksum differs
+        //           -> downloaded
+        mtime = mtime.addDays(1);
+        fakeFolder.localModifier().setContents("A/a1", 'W');
+        fakeFolder.localModifier().setModTime("A/a1", mtime);
+        fakeFolder.remoteModifier().setContents("A/a1", 'W');
+        fakeFolder.remoteModifier().setModTime("A/a1", mtime);
+        remoteInfo.find("A/a1")->checksums = "SHA1:bad";
+        QVERIFY(fakeFolder.syncOnce());
+        QCOMPARE(nGET, 1);
+
+        // Conflict: Same content, mtime, matching checksums
+        //           -> PropagateDownload, but it skips the download
+        mtime = mtime.addDays(1);
+        fakeFolder.localModifier().setContents("A/a1", 'C');
+        fakeFolder.localModifier().setModTime("A/a1", mtime);
+        fakeFolder.remoteModifier().setContents("A/a1", 'C');
+        fakeFolder.remoteModifier().setModTime("A/a1", mtime);
+        remoteInfo.find("A/a1")->checksums = "SHA1:56900fb1d337cf7237ff766276b9c1e8ce507427";
+        QVERIFY(fakeFolder.syncOnce());
+        QCOMPARE(nGET, 1);
+
+        // Extra sync reads from db, no difference
+        QVERIFY(fakeFolder.syncOnce());
+        QCOMPARE(nGET, 1);
+    }
 };
 
 QTEST_GUILESS_MAIN(TestSyncEngine)
index fa6021acc4232227bfe605668911ccb7eaf43ee5..ec673077dcaef271f50e4ff63b88dadb886acb95 100644 (file)
@@ -53,17 +53,15 @@ private slots:
         record._fileId = "abcd";
         record._remotePerm = "744";
         record._fileSize = 213089055;
-        record._contentChecksum = "mychecksum";
-        record._contentChecksumType = "MD5";
+        record._checksumHeader = "MD5:mychecksum";
         QVERIFY(_db.setFileRecord(record));
 
         SyncJournalFileRecord storedRecord = _db.getFileRecord("foo");
         QVERIFY(storedRecord == record);
 
         // Update checksum
-        record._contentChecksum = "newchecksum";
-        record._contentChecksumType = "Adler32";
-        _db.updateFileRecordChecksum("foo", record._contentChecksum, record._contentChecksumType);
+        record._checksumHeader = "Adler32:newchecksum";
+        _db.updateFileRecordChecksum("foo", "newchecksum", "Adler32");
         storedRecord = _db.getFileRecord("foo");
         QVERIFY(storedRecord == record);
 
@@ -91,16 +89,14 @@ private slots:
             SyncJournalFileRecord record;
             record._path = "foo-checksum";
             record._remotePerm = "744";
-            record._contentChecksum = "mychecksum";
-            record._contentChecksumType = "MD5";
+            record._checksumHeader = "MD5:mychecksum";
             record._modtime = QDateTime::currentDateTimeUtc();
             QVERIFY(_db.setFileRecord(record));
 
             SyncJournalFileRecord storedRecord = _db.getFileRecord("foo-checksum");
             QVERIFY(storedRecord._path == record._path);
             QVERIFY(storedRecord._remotePerm == record._remotePerm);
-            QVERIFY(storedRecord._contentChecksum == record._contentChecksum);
-            QVERIFY(storedRecord._contentChecksumType == record._contentChecksumType);
+            QVERIFY(storedRecord._checksumHeader == record._checksumHeader);
 
             // qDebug()<< "OOOOO " << storedRecord._modtime.toTime_t() << record._modtime.toTime_t();