Handle Encoding Problems
authorOlivier Goffart <ogoffart@woboq.com>
Tue, 10 Jul 2018 14:56:15 +0000 (16:56 +0200)
committerKevin Ottens <kevin.ottens@nextcloud.com>
Tue, 15 Dec 2020 09:57:57 +0000 (10:57 +0100)
TestSyncEngine now passes

src/csync/csync_update.cpp
src/libsync/discovery.cpp
src/libsync/syncengine.cpp

index 4c121503b78adb64bce0665de0a2508ef7bb3bb4..dc383f05a78be7f9e71c4d3bc18939404275c08a 100644 (file)
@@ -148,21 +148,6 @@ static int _csync_detect_update(CSYNC *ctx, std::unique_ptr<csync_file_stat_t> f
       }
   }
 
-  auto localCodec = QTextCodec::codecForLocale();
-  if (ctx->current == REMOTE_REPLICA && localCodec->mibEnum() != 106) {
-      /* If the locale codec is not UTF-8, we must check that the filename from the server can
-       * be encoded in the local file system.
-       *
-       * We cannot use QTextCodec::canEncode() since that can incorrectly return true, see
-       * https://bugreports.qt.io/browse/QTBUG-6925.
-       */
-      QTextEncoder encoder(localCodec, QTextCodec::ConvertInvalidToNull);
-      if (encoder.fromUnicode(QString::fromUtf8(fs->path)).contains('\0')) {
-          qCInfo(lcUpdate, "cannot encode %s to local encoding %d",
-              fs->path.constData(), localCodec->mibEnum());
-          excluded = CSYNC_FILE_EXCLUDE_CANNOT_ENCODE;
-      }
-  }
 
   if (fs->type == ItemTypeFile ) {
     if (fs->modtime == 0) {
index 2dec6fdac1cb52e608c898a5ac779d9e557b0e9b..8cb2adfe864c3328ae95ebcf94b7df387cac8020 100644 (file)
 #include <algorithm>
 #include <set>
 #include <QDirIterator>
+#include <QTextCodec>
 #include "vio/csync_vio_local.h"
 #include "common/checksums.h"
 #include "csync_exclude.h"
 
+
 namespace OCC {
 
 Q_LOGGING_CATEGORY(lcDisco, "sync.discovery", QtInfoMsg)
@@ -97,7 +99,20 @@ void ProcessDirectoryJob::start()
         }
         while (auto dirent = csync_vio_local_readdir(dh)) {
             LocalInfo i;
-            i.name = QString::fromUtf8(dirent->path); // FIXME! conversion errors
+            static QTextCodec *codec = QTextCodec::codecForName("UTF-8");
+            ASSERT(codec);
+            QTextCodec::ConverterState state;
+            i.name = codec->toUnicode(dirent->path, dirent->path.size(), &state);
+            if (state.invalidChars > 0 || state.remainingChars > 0) {
+                _childIgnored = true;
+                auto item = SyncFileItemPtr::create();
+                item->_file = _currentFolder + i.name;
+                item->_instruction = CSYNC_INSTRUCTION_IGNORE;
+                item->_status = SyncFileItem::NormalError;
+                item->_errorString = tr("Filename encoding is not valid");
+                emit itemDiscovered(item);
+                continue;
+            }
             i.modtime = dirent->modtime;
             i.size = dirent->size;
             i.inode = dirent->inode;
@@ -194,6 +209,20 @@ bool ProcessDirectoryJob::handleExcluded(const QString &path, bool isDirectory,
         excluded = CSYNC_FILE_EXCLUDE_HIDDEN;
     }
 
+    auto localCodec = QTextCodec::codecForLocale();
+    if (localCodec->mibEnum() != 106) {
+        // If the locale codec is not UTF-8, we must check that the filename from the server can
+        // be encoded in the local file system.
+        //
+        // We cannot use QTextCodec::canEncode() since that can incorrectly return true, see
+        // https://bugreports.qt.io/browse/QTBUG-6925.
+        QTextEncoder encoder(localCodec, QTextCodec::ConvertInvalidToNull);
+        if (encoder.fromUnicode(path).contains('\0')) {
+            qCWarning(lcDisco) << "Cannot encode " << path << " to local encoding " << localCodec->name();
+            excluded = CSYNC_FILE_EXCLUDE_CANNOT_ENCODE;
+        }
+    }
+
     if (excluded == CSYNC_NOT_EXCLUDED /* FIXME && item->_type != ItemTypeSoftLink */) {
         return false;
     } else if (excluded == CSYNC_FILE_SILENTLY_EXCLUDED || excluded == CSYNC_FILE_EXCLUDE_AND_REMOVE) {
index 31a12dac62b0928082d0e89b15fb44c482f23882..0b26a3ae4ac44c0498d7900e6300e078577a1fc7 100644 (file)
@@ -387,28 +387,7 @@ int SyncEngine::treewalkFile(csync_file_stat_t * /*file*/, csync_file_stat_t * /
     // Decode utf8 path and rename_path QByteArrays to QStrings
     QString fileUtf8;
     QString renameTarget;
-    bool utf8DecodeError = false;
-    {
-        const auto toUnicode = [](QByteArray utf8, QString *result) {
-            static QTextCodec *codec = QTextCodec::codecForName("UTF-8");
-            ASSERT(codec);
-
-            QTextCodec::ConverterState state;
-            *result = codec->toUnicode(utf8, utf8.size(), &state);
-            return !(state.invalidChars > 0 || state.remainingChars > 0);
-        };
-
-        if (!toUnicode(file->path, &fileUtf8)) {
-            qCWarning(lcEngine) << "File ignored because of invalid utf-8 sequence: " << file->path;
-            instruction = CSYNC_INSTRUCTION_IGNORE;
-            utf8DecodeError = true;
-        }
-        if (!toUnicode(file->rename_path, &renameTarget)) {
-            qCWarning(lcEngine) << "File ignored because of invalid utf-8 sequence in the rename_path: " << file->path << file->rename_path;
-            instruction = CSYNC_INSTRUCTION_IGNORE;
-            utf8DecodeError = true;
-        }
-    }
+
 
     // key is the handle that the SyncFileItem will have in the map.
     QString key = fileUtf8;
@@ -513,11 +492,6 @@ int SyncEngine::treewalkFile(csync_file_stat_t * /*file*/, csync_file_stat_t * /
     }
 
 
-    if (item->_instruction == CSYNC_INSTRUCTION_IGNORE && utf8DecodeError) {
-        item->_status = SyncFileItem::NormalError;
-        //item->_instruction = CSYNC_INSTRUCTION_ERROR;
-        item->_errorString = tr("Filename encoding is not valid");
-    }
 
     bool isDirectory = file->type == ItemTypeDirectory;