Remove the usage of phash in csync
authorJocelyn Turcotte <jturcotte@woboq.com>
Wed, 23 Aug 2017 17:30:55 +0000 (19:30 +0200)
committerRoeland Jago Douma <roeland@famdouma.nl>
Thu, 5 Oct 2017 20:01:33 +0000 (22:01 +0200)
Only store the path since they represent the same thing, and do the
phash conversion during DB lookup like done in libsync.
We could get rid of everything since we also have an index on the path
column, but since it's the primary key this makes the migration non-trivial.

src/csync/csync.h
src/csync/csync_statedb.cpp
src/csync/csync_statedb.h
src/csync/csync_update.cpp
test/csync/csync_tests/check_csync_statedb_query.cpp
test/testcsyncsqlite.cpp

index 1bda232ff8da490e0ef162dbeeb3f44e8b9cc742..9eaba2ab4d1d080ea1c1df83e34cdeb11a0116d5 100644 (file)
@@ -160,7 +160,6 @@ enum csync_ftw_type_e {
 typedef struct csync_file_stat_s csync_file_stat_t;
 
 struct OCSYNC_EXPORT csync_file_stat_s {
-  uint64_t phash;
   time_t modtime;
   int64_t size;
   uint64_t inode;
@@ -189,8 +188,7 @@ struct OCSYNC_EXPORT csync_file_stat_s {
   enum csync_instructions_e instruction; /* u32 */
 
   csync_file_stat_s()
-    : phash(0)
-    , modtime(0)
+    : modtime(0)
     , size(0)
     , inode(0)
     , type(CSYNC_FTW_TYPE_SKIP)
index ca5b615379b7bd36e9a7f1f1e2606f7e02bb1586..7f7ed0c2906494987a8add83d85d1715cc45fc12 100644 (file)
@@ -231,7 +231,7 @@ int csync_statedb_close(CSYNC *ctx) {
 }
 
 #define METADATA_QUERY                                             \
-    "phash, path, inode, modtime, type, md5, fileid, remotePerm, " \
+    "path, inode, modtime, type, md5, fileid, remotePerm, " \
     "filesize, ignoredChildrenRemote, "                            \
     "contentchecksumtype.name || ':' || contentChecksum "          \
     "FROM metadata "                                               \
@@ -251,25 +251,23 @@ static int _csync_file_stat_from_metadata_table( std::unique_ptr<csync_file_stat
     }
 
     // Callers should all use METADATA_QUERY for their column list.
-    assert(sqlite3_column_count(stmt) == 11);
+    assert(sqlite3_column_count(stmt) == 10);
 
     SQLITE_BUSY_HANDLED( sqlite3_step(stmt) );
 
     if( rc == SQLITE_ROW ) {
         st.reset(new csync_file_stat_t);
 
-        /* The query suceeded so use the phash we pass to the function. */
-        st->phash = sqlite3_column_int64(stmt, 0);
-        st->path = (char*)sqlite3_column_text(stmt, 1);
-        st->inode = sqlite3_column_int64(stmt, 2);
-        st->modtime = strtoul((char*)sqlite3_column_text(stmt, 3), NULL, 10);
-        st->type = static_cast<enum csync_ftw_type_e>(sqlite3_column_int(stmt, 4));
-        st->etag = (char*)sqlite3_column_text(stmt, 5);
-        st->file_id = (char*)sqlite3_column_text(stmt, 6);
-        st->remotePerm = (char*)sqlite3_column_text(stmt, 7);
-        st->size = sqlite3_column_int64(stmt, 8);
-        st->has_ignored_files = sqlite3_column_int(stmt, 9);
-        st->checksumHeader = (char *)sqlite3_column_text(stmt, 10);
+        st->path = (char*)sqlite3_column_text(stmt, 0);
+        st->inode = sqlite3_column_int64(stmt, 1);
+        st->modtime = strtoul((char*)sqlite3_column_text(stmt, 2), NULL, 10);
+        st->type = static_cast<enum csync_ftw_type_e>(sqlite3_column_int(stmt, 3));
+        st->etag = (char*)sqlite3_column_text(stmt, 4);
+        st->file_id = (char*)sqlite3_column_text(stmt, 5);
+        st->remotePerm = (char*)sqlite3_column_text(stmt, 6);
+        st->size = sqlite3_column_int64(stmt, 7);
+        st->has_ignored_files = sqlite3_column_int(stmt, 8);
+        st->checksumHeader = (char *)sqlite3_column_text(stmt, 9);
     } else {
         if( rc != SQLITE_DONE ) {
             CSYNC_LOG(CSYNC_LOG_PRIORITY_WARN, "Query results in %d", rc);
@@ -279,8 +277,7 @@ static int _csync_file_stat_from_metadata_table( std::unique_ptr<csync_file_stat
 }
 
 /* caller must free the memory */
-std::unique_ptr<csync_file_stat_t> csync_statedb_get_stat_by_hash(CSYNC *ctx,
-                                                  uint64_t phash)
+std::unique_ptr<csync_file_stat_t> csync_statedb_get_stat_by_path(CSYNC *ctx, const QByteArray &path)
 {
   std::unique_ptr<csync_file_stat_t> st;
   int rc;
@@ -304,6 +301,7 @@ std::unique_ptr<csync_file_stat_t> csync_statedb_get_stat_by_hash(CSYNC *ctx,
     return NULL;
   }
 
+  uint64_t phash = c_jhash64((const uint8_t*)path.constData(), path.size(), 0);
   sqlite3_bind_int64(ctx->statedb.by_hash_stmt, 1, (long long signed int)phash);
 
   rc = _csync_file_stat_from_metadata_table(st, ctx->statedb.by_hash_stmt);
index 8ef2235af2f6c7ac79639870813a2eb3f820b14f..28c1cc8237216c8e806e990506ae81b72f544cb9 100644 (file)
@@ -56,7 +56,7 @@ OCSYNC_EXPORT int csync_statedb_load(CSYNC *ctx, const char *statedb, sqlite3 **
 
 OCSYNC_EXPORT int csync_statedb_close(CSYNC *ctx);
 
-OCSYNC_EXPORT std::unique_ptr<csync_file_stat_t> csync_statedb_get_stat_by_hash(CSYNC *ctx, uint64_t phash);
+OCSYNC_EXPORT std::unique_ptr<csync_file_stat_t> csync_statedb_get_stat_by_path(CSYNC *ctx, const QByteArray &path);
 
 OCSYNC_EXPORT std::unique_ptr<csync_file_stat_t> csync_statedb_get_stat_by_inode(CSYNC *ctx, uint64_t inode);
 
index f637572425a94acd71a9c43110e7b3cf957ab95b..96e13fd86bd8ac7d4bfcb45189772313d1daeb8b 100644 (file)
@@ -32,7 +32,6 @@
 #include <math.h>
 
 #include "c_lib.h"
-#include "c_jhash.h"
 
 #include "csync_private.h"
 #include "csync_exclude.h"
@@ -172,14 +171,14 @@ static int _csync_detect_update(CSYNC *ctx, std::unique_ptr<csync_file_stat_t> f
    * does not change on rename.
    */
   if (csync_get_statedb_exists(ctx)) {
-    tmp = csync_statedb_get_stat_by_hash(ctx, fs->phash);
+    tmp = csync_statedb_get_stat_by_path(ctx, fs->path);
 
     if(_last_db_return_error(ctx)) {
         ctx->status_code = CSYNC_STATUS_UNSUCCESSFUL;
         return -1;
     }
 
-    if(tmp && tmp->phash == fs->phash ) { /* there is an entry in the database */
+    if(tmp && tmp->path == fs->path ) { /* there is an entry in the database */
         /* we have an update! */
         CSYNC_LOG(CSYNC_LOG_PRIORITY_INFO, "Database entry found, compare: %" PRId64 " <-> %" PRId64
                                             ", etag: %s <-> %s, inode: %" PRId64 " <-> %" PRId64
@@ -594,8 +593,6 @@ int csync_ftw(CSYNC *ctx, const char *uri, csync_walker_fn fn,
         // "len + 1" to include the slash in-between.
         dirent->path = dirent->path.mid(strlen(ctx->local.uri) + 1);
     }
-    // We calculate the phash using the relative path.
-    dirent->phash = c_jhash64((const uint8_t*)dirent->path.constData(), dirent->path.size(), 0);
 
     previous_fs = ctx->current_fs;
     bool recurse = dirent->type == CSYNC_FTW_TYPE_DIR;
index 208e2c524706586deaf26dfa6c0ee6885eccebf5..7f3988a712f0fab2c55fecbd86f23565014bcff6 100644 (file)
@@ -155,7 +155,6 @@ static void check_csync_statedb_insert_metadata(void **state)
     for (i = 0; i < 100; i++) {
         st.reset(new csync_file_stat_t);
         st->path = QString("file_%1").arg(i).toUtf8();
-        st->phash = i;
 
         csync->local.files[st->path] = std::move(st);
     }
@@ -173,7 +172,6 @@ static void check_csync_statedb_write(void **state)
     for (i = 0; i < 100; i++) {
         st.reset(new csync_file_stat_t);
         st->path = QString("file_%1").arg(i).toUtf8();
-        st->phash = i;
 
         csync->local.files[st->path] = std::move(st);
         assert_int_equal(rc, 0);
@@ -184,12 +182,12 @@ static void check_csync_statedb_write(void **state)
 }
 
 
-static void check_csync_statedb_get_stat_by_hash_not_found(void **state)
+static void check_csync_statedb_get_stat_by_path_not_found(void **state)
 {
     CSYNC *csync = (CSYNC*)*state;
     std::unique_ptr<csync_file_stat_t> tmp;
 
-    tmp = csync_statedb_get_stat_by_hash(csync, (uint64_t) 666);
+    tmp = csync_statedb_get_stat_by_path(csync, "666");
     assert_null(tmp.get());
 }
 
@@ -210,7 +208,7 @@ int torture_run_tests(void)
         cmocka_unit_test_setup_teardown(check_csync_statedb_drop_tables, setup, teardown),
         cmocka_unit_test_setup_teardown(check_csync_statedb_insert_metadata, setup, teardown),
         cmocka_unit_test_setup_teardown(check_csync_statedb_write, setup, teardown),
-        cmocka_unit_test_setup_teardown(check_csync_statedb_get_stat_by_hash_not_found, setup_db, teardown),
+        cmocka_unit_test_setup_teardown(check_csync_statedb_get_stat_by_path_not_found, setup_db, teardown),
         cmocka_unit_test_setup_teardown(check_csync_statedb_get_stat_by_inode_not_found, setup_db, teardown),
     };
 
index 4a470e7b06fbbc52ce921411436fef53e86da4e6..093ee43cb04f4ec1934d29f66ea8865b7ba2761b 100644 (file)
@@ -27,9 +27,8 @@ private slots:
     }
 
     void testFullResult() {
-        std::unique_ptr<csync_file_stat_t> st = csync_statedb_get_stat_by_hash( _ctx, 2081025720555645157 );
+        std::unique_ptr<csync_file_stat_t> st = csync_statedb_get_stat_by_path( _ctx, "test2/zu/zuzu" );
         QVERIFY(st.get());
-        QCOMPARE( QString::number(st->phash), QString::number(2081025720555645157) );
         QCOMPARE( QString::fromUtf8(st->path), QLatin1String("test2/zu/zuzu") );
         QCOMPARE( QString::number(st->inode), QString::number(1709554));
         QCOMPARE( QString::number(st->modtime), QString::number(1384415006));
@@ -40,11 +39,11 @@ private slots:
     }
 
     void testByHash() {
-        std::unique_ptr<csync_file_stat_t> st = csync_statedb_get_stat_by_hash(_ctx, -7147279406142960289);
+        std::unique_ptr<csync_file_stat_t> st = csync_statedb_get_stat_by_path(_ctx, "documents/c1");
         QVERIFY(st.get());
         QCOMPARE(QString::fromUtf8(st->path), QLatin1String("documents/c1"));
 
-        st = csync_statedb_get_stat_by_hash(_ctx, 5426481156826978940);
+        st = csync_statedb_get_stat_by_path(_ctx, "documents/c1/c2");
         QVERIFY(st.get());
         QCOMPARE(QString::fromUtf8(st->path), QLatin1String("documents/c1/c2"));
     }