From: Kunal Mehta <legoktm@debian.org>
Date: Thu, 12 Jan 2023 03:05:31 +0000 (+0000)
Subject: Import zimlib_8.1.0+really8.0.0.orig.tar.gz
X-Git-Tag: archive/raspbian/8.1.0+really8.0.0-1+rpi1^2~2
X-Git-Url: https://dgit.raspbian.org/?a=commitdiff_plain;h=82aa37e94f05a2d92230ef86e1fefd42640e5c5c;p=zimlib.git

Import zimlib_8.1.0+really8.0.0.orig.tar.gz

[dgit import orig zimlib_8.1.0+really8.0.0.orig.tar.gz]
---

82aa37e94f05a2d92230ef86e1fefd42640e5c5c
diff --git a/.codecov.yml b/.codecov.yml
new file mode 100644
index 0000000..21288b7
--- /dev/null
+++ b/.codecov.yml
@@ -0,0 +1,17 @@
+codecov:
+  notify:
+    require_ci_to_pass: yes
+
+coverage:
+  status:
+    project:
+      default:
+        threshold: 1%
+    patch:
+      default:
+        target: 90%
+        threshold: 0%
+
+ignore:
+  - "test"
+  - "examples"
diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml
new file mode 100644
index 0000000..f39dc2a
--- /dev/null
+++ b/.github/FUNDING.yml
@@ -0,0 +1,12 @@
+# These are supported funding model platforms
+
+github: kiwix # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2]
+patreon: # Replace with a single Patreon username
+open_collective: # Replace with a single Open Collective username
+ko_fi: # Replace with a single Ko-fi username
+tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
+community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
+liberapay: # Replace with a single Liberapay username
+issuehunt: # Replace with a single IssueHunt username
+otechie: # Replace with a single Otechie username
+custom: # https://kiwix.org/support-us/
diff --git a/.github/script/build_libzim.cmd b/.github/script/build_libzim.cmd
new file mode 100644
index 0000000..aafa960
--- /dev/null
+++ b/.github/script/build_libzim.cmd
@@ -0,0 +1,10 @@
+call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvars64.bat"
+
+set CC=cl.exe
+set CXX=cl.exe
+
+meson.exe setup build . --force-fallback-for liblzma -Ddefault_library=static -Dwith_xapian=false -Dzstd:bin_programs=false -Dzstd:bin_tests=false -Dzstd:bin_contrib=false -Dliblzma:default_library=static -Dliblzma:enable_xz=false
+
+cd build
+
+ninja.exe
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 0000000..ee658f8
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,191 @@
+name: CI
+
+on: [push]
+
+jobs:
+  Macos:
+    strategy:
+      fail-fast: false
+      matrix:
+        target:
+          - native_dyn
+          - iOS_arm64
+          - iOS_x86_64
+    runs-on: macos-latest
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v1
+      - name: Setup python 3.9
+        uses: actions/setup-python@v1
+        with:
+          python-version: '3.9'
+      - name: Install packages
+        run: |
+          brew update
+          brew install gcovr pkg-config ninja || brew link --overwrite python@3.9
+      - name: Install python modules
+        run: pip3 install meson==0.52.1 pytest
+      - name: Install deps
+        shell: bash
+        run: |
+          ARCHIVE_NAME=deps2_osx_${{matrix.target}}_libzim.tar.xz
+          wget -O- http://tmp.kiwix.org/ci/${ARCHIVE_NAME} | tar -xJ -C $HOME
+      - name: Compile
+        shell: bash
+        run: |
+          MESON_OPTION="--default-library=shared"
+          MESON_CROSSFILE="$HOME/BUILD_${{matrix.target}}/meson_cross_file.txt"
+          if [[ ! "${{matrix.target}}" =~ native_.* ]]; then
+            MESON_OPTION="$MESON_OPTION -Db_bitcode=true --cross-file $MESON_CROSSFILE -Dstatic-linkage=true"
+            cat $MESON_CROSSFILE
+          fi
+          export PKG_CONFIG_PATH=$HOME/BUILD_${{matrix.target}}/INSTALL/lib/pkgconfig
+          meson . build ${MESON_OPTION}
+          cd build
+          ninja
+      - name: Test
+        if: startsWith(matrix.target, 'native_')
+        shell: bash
+        run: |
+          export LD_LIBRARY_PATH=$HOME/BUILD_${{matrix.target}}/INSTALL/lib:$HOME/BUILD_${{matrix.target}}/INSTALL/lib64
+          cd build
+          ninja download_test_data
+          meson test --verbose
+        env:
+          SKIP_BIG_MEMORY_TEST: 1
+
+  Windows:
+    runs-on: windows-2019
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v1
+      - name: Setup python 3.10
+        uses: actions/setup-python@v2
+        with:
+          python-version: '3.10'
+      - name: Install packages
+        run:
+          choco install ninja
+      - name: Install python modules
+        run: pip3 install meson
+      - name: Compile
+        shell: cmd
+        run: .github\script\build_libzim.cmd
+      - name: Test
+        shell: cmd
+        run: |
+          cd build
+          ninja download_test_data
+          meson test --verbose
+
+  Linux:
+    strategy:
+      fail-fast: false
+      matrix:
+        target:
+          - native_static
+          - native_dyn
+          - alpine_dyn
+          - android_arm
+          - android_arm64
+          - win32_static
+          - win32_dyn
+        with_xapian:
+          - true
+          - false
+        include:
+          - target: native_static
+            image_variant: bionic
+            lib_postfix: '/x86_64-linux-gnu'
+          - target: native_dyn
+            image_variant: bionic
+            lib_postfix: '/x86_64-linux-gnu'
+          - target: android_arm
+            image_variant: bionic
+            lib_postfix: '/arm-linux-androideabi'
+          - target: android_arm64
+            image_variant: bionic
+            lib_postfix: '/aarch64-linux-android'
+          - target: alpine_dyn
+            image_variant: alpine
+            lib_postfix: '/x86_64-linux-musl'
+          - target: win32_static
+            image_variant: f35
+            lib_postfix: '64'
+          - target: win32_dyn
+            image_variant: f35
+            lib_postfix: '64'
+    env:
+      HOME: /home/runner
+    runs-on: ubuntu-latest
+    container:
+      image: "kiwix/kiwix-build_ci:${{matrix.image_variant}}-32"
+    steps:
+    - name: Checkout code
+      shell: python
+      run: |
+        from subprocess import check_call
+        from os import environ
+        config_command = [
+          'git', 'config', '--global',
+          'http.postBuffer', '1048576000'
+        ]
+        check_call(config_command, cwd=environ['HOME'])
+        clone_command = [
+          'git', 'clone',
+          'https://github.com/${{github.repository}}',
+          '--depth=1',
+          '--branch', '${{github.ref_name}}'
+        ]
+        check_call(clone_command, cwd=environ['HOME'])
+    - name: Install deps
+      if: false == startsWith(matrix.target, 'alpine_')
+      shell: bash
+      run: |
+        ARCHIVE_NAME=deps2_${OS_NAME}_${{matrix.target}}_libzim.tar.xz
+        wget -O- http://tmp.kiwix.org/ci/${ARCHIVE_NAME} | tar -xJ -C /home/runner
+    - name: Compile
+      shell: bash
+      run: |
+        if [[ "${{matrix.target}}" =~ .*_dyn ]]; then
+          MESON_OPTION="--default-library=shared"
+        else
+          MESON_OPTION="--default-library=static"
+        fi
+        if [[ "${{matrix.target}}" =~ native_.* ]]; then
+          MESON_OPTION="$MESON_OPTION -Db_coverage=true"
+        elif [[ "${{matrix.target}}" != alpine_* ]]; then
+          MESON_OPTION="$MESON_OPTION --cross-file $HOME/BUILD_${{matrix.target}}/meson_cross_file.txt"
+        fi
+        if [[ "${{matrix.target}}" =~ android_.* ]]; then
+          MESON_OPTION="$MESON_OPTION -Dstatic-linkage=true -DUSE_BUFFER_HEADER=false"
+        fi
+        cd $HOME/libzim
+        meson . build ${MESON_OPTION} -Dwith_xapian=${{matrix.with_xapian}}
+        cd build
+        ninja
+      env:
+        PKG_CONFIG_PATH: "/home/runner/BUILD_${{matrix.target}}/INSTALL/lib/pkgconfig:/home/runner/BUILD_${{matrix.target}}/INSTALL/lib${{matrix.lib_postfix}}/pkgconfig"
+    - name: Test
+      if: startsWith(matrix.target, 'native_') || startsWith(matrix.target, 'alpine_')
+      shell: bash
+      run: |
+        cd $HOME/libzim/build
+        ninja download_test_data
+        meson test --verbose
+        if [[ "${{matrix.target}}" =~ native_.* ]]; then
+          ninja coverage
+        fi
+      env:
+        LD_LIBRARY_PATH: "/home/runner/BUILD_${{matrix.target}}/INSTALL/lib:/home/runner/BUILD_${{matrix.target}}/INSTALL/lib${{matrix.lib_postfix}}"
+        SKIP_BIG_MEMORY_TEST: 1
+    - name: Publish coverage
+      shell: bash
+      run: |
+        cd $HOME/libzim
+        curl https://codecov.io/bash -o codecov.sh
+        bash codecov.sh -n "${OS_NAME}_${{matrix.target}}" -Z
+        rm codecov.sh
+      if: startsWith(matrix.target, 'native_')
+      env:
+        CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
diff --git a/.github/workflows/package.yml b/.github/workflows/package.yml
new file mode 100644
index 0000000..208aa3f
--- /dev/null
+++ b/.github/workflows/package.yml
@@ -0,0 +1,114 @@
+name: Packages
+on: [push, pull_request]
+
+jobs:
+  build-deb:
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        distro:
+          - debian-unstable
+          - debian-bullseye
+          - debian-buster
+          - ubuntu-kinetic
+          - ubuntu-jammy
+          - ubuntu-focal
+          - ubuntu-bionic
+    steps:
+      - uses: actions/checkout@v2
+
+      # Determine which PPA we should upload to
+      - name: PPA
+        id: ppa
+        run: |
+          if [[ $REF == refs/tags* ]]
+          then
+            echo "::set-output name=ppa::kiwixteam/release"
+          else
+            echo "::set-output name=ppa::kiwixteam/dev"
+          fi
+        env:
+          REF: ${{ github.ref }}
+
+      - uses: legoktm/gh-action-auto-dch@master
+        with:
+          fullname: Kiwix builder
+          email: release+launchpad@kiwix.org
+          distro: ${{ matrix.distro }}
+
+      - uses: legoktm/gh-action-build-deb@debian-unstable
+        if: matrix.distro == 'debian-unstable'
+        name: Build package for debian-unstable
+        id: build-debian-unstable
+        with:
+          args: --no-sign
+
+      - uses: legoktm/gh-action-build-deb@debian-bullseye
+        if: matrix.distro == 'debian-bullseye'
+        name: Build package for debian-bullseye
+        id: build-debian-bullseye
+        with:
+          args: --no-sign
+
+      - uses: legoktm/gh-action-build-deb@debian-buster
+        if: matrix.distro == 'debian-buster'
+        name: Build package for debian-buster
+        id: build-debian-buster
+        with:
+          args: --no-sign
+
+      - uses: legoktm/gh-action-build-deb@ubuntu-kinetic
+        if: matrix.distro == 'ubuntu-kinetic'
+        name: Build package for ubuntu-kinetic
+        id: build-ubuntu-kinetic
+        with:
+          args: --no-sign
+          ppa: ${{ steps.ppa.outputs.ppa }}
+
+      - uses: legoktm/gh-action-build-deb@ubuntu-jammy
+        if: matrix.distro == 'ubuntu-jammy'
+        name: Build package for ubuntu-jammy
+        id: build-ubuntu-jammy
+        with:
+          args: --no-sign
+          ppa: ${{ steps.ppa.outputs.ppa }}
+
+      - uses: legoktm/gh-action-build-deb@ubuntu-focal
+        if: matrix.distro == 'ubuntu-focal'
+        name: Build package for ubuntu-focal
+        id: build-ubuntu-focal
+        with:
+          args: --no-sign
+          ppa: ${{ steps.ppa.outputs.ppa }}
+
+      - uses: legoktm/gh-action-build-deb@ubuntu-bionic
+        if: matrix.distro == 'ubuntu-bionic'
+        name: Build package for ubuntu-bionic
+        id: build-ubuntu-bionic
+        with:
+          args: --no-sign
+          ppa: ${{ steps.ppa.outputs.ppa }}
+
+      - uses: actions/upload-artifact@v2
+        with:
+          name: Packages for ${{ matrix.distro }}
+          path: output
+
+      - uses: legoktm/gh-action-dput@master
+        name: Upload dev package
+        # Only upload on pushes to master
+        if: github.event_name == 'push' && github.event.ref == 'refs/heads/master' && startswith(matrix.distro, 'ubuntu-')
+        with:
+          gpg_key: ${{ secrets.LAUNCHPAD_GPG }}
+          repository: ppa:kiwixteam/dev
+          packages: output/*_source.changes
+
+      - uses: legoktm/gh-action-dput@master
+        name: Upload release package
+        # Only upload on pushes to master or tag
+        if: github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags') && startswith(matrix.distro, 'ubuntu-')
+        with:
+          gpg_key: ${{ secrets.LAUNCHPAD_GPG }}
+          repository: ppa:kiwixteam/release
+          packages: output/*_source.changes
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..d319a8f
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,35 @@
+*~
+*#*
+autom4te.cache
+build
+compile
+config.h
+configure
+depcomp
+.deps
+.dirstamp
+INSTALL
+install-sh
+*.kate-swp
+*.la
+.libs
+libtool
+*.lo
+ltmain.sh
+*.m4
+Makefile
+Makefile.in
+missing
+*.o
+stamp-h1
+.svn
+.*.swp
+*.zim
+examples/createZimExample
+src/tools/zimdump
+src/tools/zimsearch
+libzim.pc
+test-driver
+test/zimlib-test*
+test/test-suite.log
+.clangd
diff --git a/AUTHORS b/AUTHORS
new file mode 100644
index 0000000..2b31fc1
--- /dev/null
+++ b/AUTHORS
@@ -0,0 +1,17 @@
+# This is the list of Libzim's significant contributors.
+#
+# This does not necessarily list everyone who has contributed code,
+# especially since many employees of one corporation may be contributing.
+# To see the full list of contributors, see the revision history in
+# source control.
+
+C. Scott Ananian <cscott@cscott.net> https://github.com/cscott
+Dmitry Atamanov <data-man@users.noreply.github.com> https://github.com/data-man
+Emmanuel Engelhart <kelson@kiwix.org> https://github.com/kelson42
+Kunal Mehta <legoktm@debian.org> https://github.com/legoktm
+Maneeshpm <manu.pm55@gmail.com> https://github.com/maneeshpm
+Matthieu Gautier <mgautier@kymeria.fr> https://github.com/mgautierfr
+MiguelRocha <miguelrocha@ua.pt> https://github.com/miguelrocha
+Renaud Gaudin <reg@kiwix.org> https://github.com/rgaudin
+Tommi MÃ¤kitalo <tommi@tntnet.org> https://github.com/maekitalo
+Veloman Yunkan <veloman.yunkan@gmail.com> https://github.com/veloman-yunkan
diff --git a/COPYING b/COPYING
new file mode 100644
index 0000000..e2683b5
--- /dev/null
+++ b/COPYING
@@ -0,0 +1,280 @@
+		    GNU GENERAL PUBLIC LICENSE
+		       Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+     51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+			    Preamble
+
+  The licenses for most software are designed to take away your
+freedom to share and change it.  By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users.  This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it.  (Some other Free Software Foundation software is covered by
+the GNU Library General Public License instead.)  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+  To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have.  You must make sure that they, too, receive or can get the
+source code.  And you must show them these terms so they know their
+rights.
+
+  We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+  Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software.  If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+  Finally, any free program is threatened constantly by software
+patents.  We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary.  To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+		    GNU GENERAL PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+  0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License.  The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language.  (Hereinafter, translation is included without limitation in
+the term "modification".)  Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope.  The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+  1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+  2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+    a) You must cause the modified files to carry prominent notices
+    stating that you changed the files and the date of any change.
+
+    b) You must cause any work that you distribute or publish, that in
+    whole or in part contains or is derived from the Program or any
+    part thereof, to be licensed as a whole at no charge to all third
+    parties under the terms of this License.
+
+    c) If the modified program normally reads commands interactively
+    when run, you must cause it, when started running for such
+    interactive use in the most ordinary way, to print or display an
+    announcement including an appropriate copyright notice and a
+    notice that there is no warranty (or else, saying that you provide
+    a warranty) and that users may redistribute the program under
+    these conditions, and telling the user how to view a copy of this
+    License.  (Exception: if the Program itself is interactive but
+    does not normally print such an announcement, your work based on
+    the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole.  If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works.  But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+  3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+    a) Accompany it with the complete corresponding machine-readable
+    source code, which must be distributed under the terms of Sections
+    1 and 2 above on a medium customarily used for software interchange; or,
+
+    b) Accompany it with a written offer, valid for at least three
+    years, to give any third party, for a charge no more than your
+    cost of physically performing source distribution, a complete
+    machine-readable copy of the corresponding source code, to be
+    distributed under the terms of Sections 1 and 2 above on a medium
+    customarily used for software interchange; or,
+
+    c) Accompany it with the information you received as to the offer
+    to distribute corresponding source code.  (This alternative is
+    allowed only for noncommercial distribution and only if you
+    received the program in object code or executable form with such
+    an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it.  For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable.  However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+  4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License.  Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+  5. You are not required to accept this License, since you have not
+signed it.  However, nothing else grants you permission to modify or
+distribute the Program or its derivative works.  These actions are
+prohibited by law if you do not accept this License.  Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+  6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions.  You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+  7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all.  For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices.  Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+  8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded.  In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+  9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation.  If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+  10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission.  For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this.  Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+			    NO WARRANTY
+
+  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+		     END OF TERMS AND CONDITIONS
diff --git a/ChangeLog b/ChangeLog
new file mode 100644
index 0000000..08fa831
--- /dev/null
+++ b/ChangeLog
@@ -0,0 +1,547 @@
+libzim 8.0.0
+============
+
+ * [API-BREAK] Remove lzma compression support in writer (@veloman-yunkan #718)
+ * Add new method `zim::Entry::getRedirectEntryIndex()` (@veloman-yunkan #716)
+ * Add new helper function `zim::setICUDataDirectory()` to help android wrapper
+   compilation (@mgautierfr #722)
+ * Fix `std::call_once` usage (alpine bug) (@veloman-yunkan #Ãª708)
+ * Better xapian indexation (no transaction, better compact algorithm) (@mgautierfr #719)
+ * Reserve more space (1968B instead of 944B) for mimetype list (@mgautierfr #720)
+ * [CI] Fix android compilation in the CI (@veloman-yunkan @mgautierfr #713)
+ * [CI] Add CI for Alpine (@veloman-yunkan #710)
+ * [CI] Support checkout of tag in the CI (@teeks99 #696)
+ * [CI] Remove movebot (@kelson42 704)
+ * [CI] Remove Impish and add Kinetic packages (@legoktm #715)
+ * Fix code factor report (@kelson42 #700)
+ * Fix readme (@kelson42 #701 #716)
+
+libzim 7.2.2
+============
+
+ * Change the way we generate search result snippet.
+   We now ask xapian to generate "less" relevant snippet (even if in practice,
+   snippets are still good). But it know generate snippet far more quicker.
+   On cold search, no cache and low IO, search can go from 90s to 3s.
+   (@mgautier #697)
+ * [CI] Update base images (@mgautier #695)
+
+libzim 7.2.1
+============
+
+ * Make suggestions diacritics insensitive (@veloman-yunkan #691)
+ * [Writer] Raise an exception when user add a invalid entry (duplicate path)
+   instead of printing a message (which can be too easily missed) and be buggy
+   (@mautierfr #690)
+ * [Writer] Do not `hasIndexData` and `getTitle` in the main thread when we add
+   an entry (@mgautier #684)
+ * [Writer] Properly clean and stop the writer even if user hasn't call
+   `finishZimCreation` (The created zim file is still invalid) (@veloman-yunkan #666)
+ * Add a default argument value for mimetype of `creator::addMetadata`
+   (@kelvinhammond #678)
+ * Use a more informative message in exception when we cannot open a file
+   (@veloman-yunkan #667 #668)
+ * Use a generic dirent lookup to search by title (@veloman-yunkan #651)
+ * Various improvements:
+   - CI, Packaging : Stop creating packages for Ubuntu Hirsute (@legoktm #664)
+   - Update Readme (@TheDuchy #660)
+   - Fix cross-compilation host machine detection (@kelson42 #665)
+   - Fix macos/ios compilation (@mgautierfr #672)
+   - Update documentation @mgautierfr #677, @veloman-yunkan #682
+
+
+libzim 7.2.0
+============
+
+ * Add methods to get/print (dependences) versions (@kelson42, #452)
+ * Fix Emscripten compilation (@kelson42, @mossroy, #643)
+
+libzim 7.1.0
+============
+
+ * Fix dirent test on 32 bits architectures (@mgautierfr #632)
+ * Fix compilation on Alpine - with musl (@amirouche #649)
+ * Don't crash if ZIM without illustration nor X/W namespace (@mgautierfr #641)
+ * Switch default suggestion operator to AND (@maneeshpm #644)
+ * Add a new method Archive::getMetadataItem (@mgautierfr #639)
+ * Better indexion criterias (@mgautierfr #642)
+ * Avoid duplicated archives in the searcher (@veloman-yunkan #648)
+ * Fix random entry (@veloman-yunkan #650)
+ * Various improvements.
+   - CI @mgautierfr #640, @kelson42 #638, @legoktm #654
+   - Doc @rgaudin #646
+
+libzim 7.0.0
+============
+
+Version 7.0.0 is a major release.
+
+The API has been completely rewritten.
+Most notable change is that namespaces are now hidden.
+The new API is described in documentation, which includes a Transition Guide from v6.
+
+ZIM files created with it uses new ZIM minor version (6.1 - see Header section of spec.)
+Both backward and forward compatibility is kept.
+
+Improvements
+------------
+
+ * Rewrite creator and reader API
+   This removes the namespace from the API. Article are automatically put in
+   the right namespace ('A') and the retrivial of content is made using
+   specific API. (@mgautier #454)
+ * Better handling of the conditional compilation without xapian.
+   Before that, the search API was present (but returning empty result) if
+   libzim was compiled without xapian. Now the API is not present anymore.
+   User code must check if libzim is compiled with xapian or not by checking
+   if LIBZIM_WITH_XAPIAN is defined or not. (@mgautierfr #465)
+ * Add a new specific listing in zim files to list entries considered as "front
+   article". At creation, wrapper MUST pass the hint `FRONT_ARTICLE` to
+   correctly mark the entry. Search by title uses this list if present.
+   (@mgautierfr #487)
+ * Store the wellknown entries in the `W` namespace (`W/mainPage`)
+   (@mgautierfr #497)
+ * Rewrite Search API. Fix potential memory link and allow correct reusing of
+   create search. (@mgautierfr #530)
+ * New suggestion search API. The api mimics the Search API but specialized
+   for suggestion (@maneeshpm #574)
+ * Add `zim::Archive` constructors to open an archive using a existing file descriptor.
+   This API is not available on Windows. (@veloman-yunkan #449)
+ * Make zstd the default compression algorithm (@veloman-yunkan #480)
+ * The method `zim::Archive::checkIntegrity` now if the mimetypes indicated in the
+   dirents are correct (@veloman-yunkan #505)
+ * Writer doesn't add a `.zim` extension to the given path. (@maneeshpm #503)
+ * Implement random entry picking. We are choosing a entry from the "front
+   article" list if present. (@mgautierfr #476)
+ * Creator now create the `M/Counter` metadata. (@mgautierfr)
+ * Better Illustration handling. Favicon is replaced by Illustration.
+   Illustration can now have different size and scale (even if the API do
+   not use this feature) (@mgautierfr #540)
+ * Search iterator now have a method `getZimId` to know the Id of the zim
+   corresponding to the result (useful for multizim search) (@maneeshpm #557)
+
+Bug fixes
+---------
+
+ * The method `zim::Archive::checkIntegrity` now check if the dirents are
+   correctly sorted. (@veloman-yunkan #448)
+ * Handle large MIME-type list. Some zim file may have a pretty large mimetype
+   list. (@veloman-yunkan #460)
+ * Fix handling of zim file containing item of size 0. (@mgautierfr #483)
+ * Better parsing of the entry paths to detect the namespace (@maneeshpm #479)
+ * Fix zim file creation on Windows (@mgautierfr #508)
+ * Better algorithm tunning for suggestion search (@maneeshpm #492)
+ * The default indexer now index html content only. (@mgautierfr #511)
+ * Better suggestion search : Don't use stopwords, use OP_PHRASE
+   (@maneeshpm #501)
+ * Remove duplicate in the suggestion search (@maneeshpm #515)
+ * Remove the termlist from the xapian database, lower memory usage
+   (@maneeshpm #528)
+ * Add a anchor in the suggestion search to search term at the beginning of
+   the title (@maneeshpm #526)
+ * Make the suggestion search working with special characters (`&`, `+`)
+   (@veloman-yunkan #534)
+ * Fix creator issue not detecting that cluster must be extended if it
+   contains only 32-bit-sized content. (@veloman-yunkan #552)
+ * Correctly generate suggestion snippet. (@maneeshpm #545)
+ * Better cluster size configuration (@mgautierfr #555)
+ * Make search iterator `getTitle` return the real title of the entry and not
+   the one stored in the xapian database (caseless) (@maneeshpm #586)
+ * Correcly close a zim creator to avoid a crash when the creator is
+   destructed without being started (@mgautierfr #613)
+ * Reduce the creator memory usage by reducing the memory size of the dirent
+   (@mgautier #616, #628)
+ * Write the cluster using a bigger chunk size for performance
+   (@mgautierfr #506)
+ * Change the default cluster size to 2MiB (@mgautierfr #585)
+ * The default mimetype for metadata now include the utf8 chardet
+   (@rgaudin #626)
+ * Improve the estimation of the number of search/suggestion results by forcing
+   Xapian to evaluate at least 10 results (@mgautier #625)
+
+Other
+-----
+
+ * Update xapian stopwords list. (@data-man #447)
+ * Remove direct pthread dependency (use c++11 thread library). (@mgautierfr #443)
+   We still need pthread library on linux and freebsd as C++11 is using it internally.
+ * [CI] Make the libzim CI compile libzim natively on Windows (@mgautierfr #453).
+ * [CI] Build libzim package for Ubuntu Hirsute and Impish
+   (@legoktm #459, #580)
+ * Always create zim file using the major version 6. (@mgautierfr #512)
+ * Move the test data files out of the git repository. Now test files are
+   stored in `zim-testing-suite` repository and must be downloaded.
+   (@mgautierfr #538, #535)
+ * Add search iterator unit test (@maneeshpm #547)
+ * Correctly fix search iterator method case to use camelCase everywhere
+   (@maneeshpm #563)
+ * Add a cast to string opertor on Uuid (@maneeshpm #582)
+ * Make unittest print the path of the missing zim file when something goes
+   wrong (@kelson42 #601)
+ * Delete temporary data (index) after we called `finishZimCreation` instead of
+   waiting for creator destruction. (@mgautierfr #603)
+ * Add basic user documentation (@mgautierfr #611)
+
+Known bugs
+----------
+
+Suggestion system using in current libkiwix doesn't work with new zim files
+created with this release (and future ones).
+New libkiwix version will be fixed and will work with new and old zim files.
+
+
+libzim 6.3.2
+============
+
+This is a hotfix of 6.3.0 :
+ * libzim now create zimfile with zstd compression 19 instead of 22.
+   So new libzim do not need to allocate 128Mb per cluster at decompression
+   time.
+ * At reading time, on 32 bits architectures, zstd cluster are not keep in
+   cache. This avoid use to also keep the decompression stream which reserve
+   128Mb of memory address.
+
+libzim 6.3.1
+============
+
+The release process of 6.3.1 was buggy. So, no 6.3.1.
+
+
+libzim 6.3.0
+============
+
+ * Rewrite internal reader structure to use stream decompression.
+   This allow libzim to not decompresse the whole cluster to get an article
+   content. This is big performance improvement, it speedups random access by
+   2, with a very small cost when doing "full" incremental reading
+   (zim-check/zim-dump). (@veloman-yunkan)
+ * Better dirent lookup.
+   Dirent lookup is the process of locating article data starting from the url
+   or title. This improves reading of zim file up to 10% (@veloman-yunkan)
+ * Add basic, first version of `validate` function to check internal structure
+   of a zim file. (@veloman-yunkan, @MiguelRocha)
+ * Fix compilation of libzim without xapian (@mgautierfr)
+ * Remove zlib dependency (and support of very old files created using zlib
+   compression) (@mgautierfr)
+ * New unit tests and various small fixes.
+
+
+libzim 6.2.2
+============
+
+ * Check blob index before access it in the cluster.
+ * Refactoring of the cluster reading.
+
+libzim 6.2.1 (release process broken)
+=====================================
+
+ * Update readme and add link to repology.org packages list.
+ * Fix compilation on windows.
+
+libzim 6.2.0
+============
+
+ * Fix compilation of libzim on freebsd.
+ * Rewrite unit tests to remove python based test and use gtest all the time.
+ * Make libzstd mandatory.
+ * Support for meson 0.45.
+ * Fix multipart support on macos.
+ * Add a documentation system.
+ * Better cache system implementation (huge speed up).
+ * Various (and numerous) small refactoring.
+
+
+libzim 6.1.8
+============
+
+ * Increase default timeout for test to 120 seconds/test
+ * Compression algorithm to use can be passed to `zim::writer::Creator`
+ * Add automatic debian packaging of libzim.
+ * Fix using of tmpdir (and now use env var TMPDIR) during tests.
+
+
+libzim 6.1.7
+============
+
+ * Do not assume urlPtrPos is just after the mimetype list.
+ * Fix compilation of compression test.
+ * Do not exit but throw an exception if an ASSERTÂ is not fulfill.
+
+libzim 6.1.6
+============
+
+ * Better (faster) implementation of the ordering of article by cluster.
+ * Fix compression algorithm.
+
+libzim 6.1.5
+============
+
+ * [Writer] Remove unused declaration of classes.
+   Those classes were not implemented nor used at all.
+
+libzim 6.1.4
+============
+
+ * [Writer] Fix excessive memory usage. Data of the cluster were clean at the
+   end of the process, not once we don't need it.
+
+libzim 6.1.3
+============
+
+ * [Writer] Use a `.tmp` suffix and rename to `.zim` at the end of the write
+ proces.
+ * Add unit tests
+ * Do not include uncessary `windows.h` headers in public zim's headers.
+
+libzim 6.1.2
+============
+
+ * [CI] Fix codecov configuration
+ * [Writer] Fix threads synchronization at end of writing process.
+
+libzim 6.1.1
+============
+
+ * Fix bug around the find function
+
+libzim 6.1.0
+============
+
+ * Compile now on OpenBSD
+ * [Test] Use the main function provided by gtest.
+ * [CI] Move the CI compilation to github actions.
+ * Add stopwords for 54 new languages.
+ * [Writer] Improve the way we are writing cluster at zim creation time.
+   - Clusters are directly written in the zim file instead of using temporary
+     files.
+   - mimetypes are limited to 944 bytes.
+ * Add a new type of iterator to iterate over articles in a performant way
+   reducing decompression of clusters. This is now the new default iterator.
+ * Add support for zim files compressed with zstd compression algorithm.
+   This is not possible to use zstd to create zim file for now.
+
+libzim 6.0.2
+============
+
+ * Fix search suggestion parsing.
+
+libzim 6.0.1
+============
+
+ * Fix crash when trying to open an empty file.
+ * Ensure that pytest tests are run on the CI.
+
+libzim 6.0.0
+============
+
+ * [Writer] Index the articles in differents threads. This is a huge speed
+   improvement as the main thread in not blocked by indexing.
+ * Index the title only if `shouldIndex` return true.
+
+libzim 5.1.0
+============
+
+ * Improve indexation of the title.
+ * Better pertinence of suggestions (only for new zim files)
+ * Improvement of the speed of Leveinstein distance for suggestions (for old
+   zims)
+
+libzim 5.0.2
+============
+
+ * Improve README.
+ * Remove gtest as embeded subproject.
+ * Better lzma compression.
+ * Better performance of the leveinstein algorithm (better suggestions
+   performance)
+
+libzim 5.0.1
+============
+
+ * Update README.
+ * [Writer] Add debug information (print progress of the clusters writing).
+ * [Writer] Correctly print the url to the user.
+ * [CI] Add code coverage.
+
+libzim 5.0.0
+============
+
+ * Fix thread slipping for win32 crosscompilation.
+ * Fix a potential invalid access when reading dirent.
+ * Fix memory leak in the decompression algorithm.
+ * [Writer] Fix a memory leak (cluster cleanning)
+ * [Writer] Write article data in a temporary cluster file instead of a
+   temporary file per article.
+ * [Writer] Better algorithm to store the dirent while creating the zim
+   file. Better memory usage.
+ * [Writer] [API Change] Url/Ns are now handle using the same struct Url.
+ * [Writer] [API Change] No more aid and redirectAid. A redirectArticle
+   have to implement redirectUrl.
+ * [Writer] Use a memory pool to avoid multiple small memory allocations.
+ * [Writer] [API Change] Rename `ZimCreator` to `Creator`.
+ * [API Change] File's `search` and `suggestions` now return a unique_ptr
+   instead of a raw pointer.
+
+libzim 4.0.7
+============
+
+ * Build libzim without rpath.
+
+libzim 4.0.6
+============
+
+ * Support zim file created with cluster not written sequentially.
+ * Remove a meson warning.
+
+libzim 4.0.5
+============
+
+ * Store the xapian database in the right url.
+ * Do not fail when reading very small zim file (<256b).
+ * Do not print message on normal behavior.
+ * [BUILDSYSTEM] Be able to build a dynamic lib (libzim.so) but using static
+   dependencies.
+ * [CI] Use last version of meson.
+ * [CI] Use the new deps archive xz
+
+libzim 4.0.4
+============
+
+ * Fix opening of multi-part zim.
+ * Fix convertion of path to wpath on Windows.
+
+libzim 4.0.3
+============
+
+ * Implement low level file manipilation using different backends
+
+libzim 4.0.2
+============
+
+ * [Windows] Fix opening of zim file bigger than 4GiB
+
+libzim 4.0.1
+============
+
+ * [Writer] Fix wrong redirectyon log message
+ * Make libzim compile natively on windows using MSVC
+ * Better message when failing to read a zim file.
+ * Make libzim on windows correctly open unicode path.
+ * Add compilation option to use less memory (but more I/O).
+   Usefull on low memory devices (android)
+ * Small fixes
+
+libzim 4.0.0
+============
+
+ * [Writer] Remove a lot of memory copy.
+ * [Writer] Add xapian indexing directly in libzim.
+ * [Writer] Better API.
+ * [Writer] Use multi-threading to write clusters.
+ * [Writer] Ensure mimetype of articles article is not null.
+ * Extend test timeout for cluster's test.
+ * Less memory copy for cluster's test.
+ * Allow skipping test using a lot memory using env variable
+   `SKIP_BIG_MEMORY_TEST=1`
+ * Explicitly use the icu namespace to allow using of packaged icu lib.
+ * Use a temporary file name as long as the ZIM writting process is
+ not finished (#163)
+ * [Travis] Do no compile using gcc-5 (but the default trusty's one 4.8)
+
+libzim 3.3.0
+============
+
+ * Fix handling of big cluster (>4GiB) on 32 bits architecture. This is mainly
+ done by :
+   * Do not mmap the whole cluster by default.
+   * MMap only the memory asociated to an article.
+   * If an article is > 4GiB, the blob associated to it is invalid
+     (data==size==0).
+   * Other information are still valid (directAccessInformation, ...)
+ * Fix writing of extended cluster in writer.
+ * Compile libzim on macos.
+ * Build libzim setting RPATH.
+ * Search result urls are now what is stored in the zim file. They should not
+   start with a `/`. This is a revert of the change made in last release.
+   (See kiwix/kiwix-lib#123)
+ * Spelling corrections in README.
+
+libzim 3.2.0
+============
+
+ * Support geo query if the xapian database has indexed localisation.
+ * Handle articles bigger than 4Go in the zim file (#110).
+ * Use AND operator between search term.
+ * Fix compilation with recent clang (#95).
+ * Add method to get article's data localisation in the zim file.
+ * Be able to get only a part of article (#77).
+ * Do not crash if we cannot open the xapian Database for some reasons.
+   (kiwix/kiwix-tools#153)
+ * Do not assumen there is always a checksum in the zim file.
+   (kiwix/kiwix-tools#150)
+ * Try to do some sanity checks when opening a zim file.
+ * Use pytest to do some tests (when cython is available).
+ * Use levenshtein distance to sort and have better suggestion results.
+ * Search result urls are now always absolute (starts with a '/').
+   (kiwix/kiwix-lib#110)
+ * Open the file readonly when checking the zim file (and so be able to check
+   read only file).
+ * Accept absolute url starting with '/' when searching for article.
+ * Fix various bugs
+
+libzim 3.1.0
+============
+
+ * Lzma is not a optional dependency anymore.
+ * Better handle (report and not crash) invalid zim file.
+ * Embed source of gtest (used only if gtest is not available on the system)
+ * Move zimDump tools out of libzim repository to zim-tools
+ * ZimCreator tools doesn't not read command line to set options.
+
+libzim 3.0.0
+============
+
+This is a major change of the libzim.
+Expect a lot new improvement and API changes.
+
+ * Add a suggestion mode to the search
+ * Fix licensing issues
+ * Fix wrong stemming of the query when searching
+ * Deactivate searching (and so crash) in the embedded database if the zim is
+   splitted
+ * Rewrite the low level memory management of libzim when reading a zim file:
+    * We use a buffer base entity to handle memory and reading file instead of
+      reading file using stream.
+    * MMap the memory when posible to avoid memory copy.
+    * Use const when posible (API break)
+ * Move to googletest instead of cxxtools for unit-tests.
+ * Fix endiannes bug on arm.
+ * Do not install private headers. Those headers declare private structure and
+   should not be visible (API break)
+ * Compile libzim with `-Werror` and `-Wall` options.
+ * Make libzim thread safe for reading article.
+   The search part is not thread safe, and all search operation must be
+   protected by a lock.
+ * Add method to get only a part of a article.
+ * Move some tools to zim-tools repository.
+
+
+libzim 2.0.0
+============
+
+ * Move to meson build system
+   `libzim` now use `meson` as build system instead of `autotools`
+ * Move to C++11Â standard.
+ * Fulltext search in zim file.
+   We have integrated the xapian fulltext search in libzim.
+   So now, libzim provide an API to search in a zim containing embeded fulltext
+   index. This means that :
+    *libzim need xapian as (optional) dependencies (if you want compile with
+     xapian support).
+    * The old and unused search API has been removed.
+ * Remove bzip2 support.
+ * Remove Symbian support.
+ * Few API hanges
+   * Make some header files private (not installed);
+   * A `Blob` can now be cast to a `string` directly;
+   * Change a lot of `File` methods to const methods.
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..da103c9
--- /dev/null
+++ b/README.md
@@ -0,0 +1,213 @@
+Libzim
+======
+
+The Libzim is the reference implementation for the [ZIM file
+format](https://wiki.openzim.org/wiki/ZIM_file_format). It's a [software
+library](https://en.wikipedia.org/wiki/Library_(computing)) to read
+and write ZIM files on many systems and architectures. More
+information about the ZIM format and the openZIM project at
+https://openzim.org/.
+
+[![Release](https://img.shields.io/github/v/tag/openzim/libzim?label=release&sort=semver)](https://download.openzim.org/release/libzim/)
+[![Repositories](https://img.shields.io/repology/repositories/libzim?label=repositories)](https://github.com/openzim/libzim/wiki/Repology)
+[![License](https://img.shields.io/badge/License-GPL%20v2-blue.svg)](https://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html)
+[![Build](https://github.com/openzim/libzim/workflows/CI/badge.svg?query=branch%3Amaster)](https://github.com/openzim/libzim/actions?query=branch%3Amaster)
+[![Doc](https://readthedocs.org/projects/libzim/badge/?style=flat)](https://libzim.readthedocs.io/en/latest/?badge=latest)
+[![Codecov](https://codecov.io/gh/openzim/libzim/branch/master/graph/badge.svg)](https://codecov.io/gh/openzim/libzim)
+[![CodeFactor](https://www.codefactor.io/repository/github/openzim/libzim/badge)](https://www.codefactor.io/repository/github/openzim/libzim)
+
+Disclaimer
+----------
+
+This document assumes you have a little knowledge about software
+compilation. If you experience difficulties with the dependencies or
+with the Libzim compilation itself, we recommend to have a look to
+[kiwix-build](https://github.com/kiwix/kiwix-build).
+
+Preamble
+--------
+
+Although the Libzim can be compiled/cross-compiled on/for many
+systems, the following documentation explains how to do it on POSIX
+ones. It is primarily though for GNU/Linux systems and has been tested
+on recent releases of Ubuntu and Fedora.
+
+Dependencies
+------------
+
+The Libzim relies on many third party software libraries. They are
+prerequisites to the Kiwix library compilation. Following libraries
+need to be available:
+* [LZMA](https://tukaani.org/lzma/) (package `liblzma-dev` on Ubuntu)
+* [ICU](http://site.icu-project.org/) (package `libicu-dev` on Ubuntu)
+* [Zstd](https://facebook.github.io/zstd/) (package `libzstd-dev` on Ubuntu)
+* [Xapian](https://xapian.org/) - optional (package `libxapian-dev` on Ubuntu)
+
+To test the code:
+* [Google Test](https://github.com/google/googletest) (package `googletest` on Ubuntu)
+* [ZIM Testing Suite](https://github.com/openzim/zim-testing-suite) - Reference test data set
+
+To build the documentations you need the packages:
+* [Doxygen](https://www.doxygen.nl)
+* Python packages for [Sphinx](https://www.sphinx-doc.org), [Sphinx rtd theme](https://github.com/readthedocs/sphinx_rtd_theme), [Breathe](https://breathe.readthedocs.io) and [Exhale](https://exhale.readthedocs.io) (packages `Sphinx`, `sphinx_rtd_theme`, `Breathe` and `Exhale` while using pip)
+
+These dependencies may or may not be packaged by your operating
+system. They may also be packaged but only in an older version. The
+compilation script will tell you if one of them is missing or too old.
+In the worse case, you will have to download and compile a more recent
+version by hand.
+
+If you want to install these dependencies locally, then ensure that
+Meson (through `pkg-config`) will properly find them.
+
+Environment
+-------------
+
+The Libzim builds using [Meson](https://mesonbuild.com/) version
+0.43 or higher. Meson relies itself on Ninja, Pkg-config and few other
+compilation tools. Install them first:
+* Meson
+* Ninja
+* Pkg-config
+
+These tools should be packaged if you use a cutting edge operating
+system. If not, have a look to the [Troubleshooting](#Troubleshooting)
+section.
+
+Compilation
+-----------
+
+Once all dependencies are installed, you can compile Libzim with:
+```bash
+meson . build
+ninja -C build
+```
+
+By default, it will compile dynamic linked libraries. All binary files
+will be created in the `build` directory created automatically by
+Meson. If you want statically linked libraries, you can add
+`--default-library=static` option to the Meson command.
+
+If you want to build the documentation, we need to pass the
+`-Ddoc=true` option and run the `doc` target:
+```bash
+meson . build -Ddoc=true
+ninja -C build doc
+```
+
+Depending on your system, `ninja` command may be called `ninja-build`.
+
+By default, Libzim tries to compile with Xapian (and will generate an
+error if Xapian is not found).  You can build without Xapian by
+passing the option `-Dwith_xapian=false` :
+```bash
+meson . build -Dwith_xapian=false
+ninja -C build doc
+```
+
+If Libzim is compiled without Xapian, all search API are removed.  You
+can test if an installed version of Libzim is compiled with or without
+xapian by testing the define `LIBZIM_WITH_XAPIAN`.
+
+Testing
+-------
+
+ZIM files needed by unit-tests are not included in this repository. By
+default, Meson will use an internal directory in your build directory,
+but you can specify another directory with option `test_data_dir`:
+```bash
+meson . build -Dtest_data_dir=<A_DIR_WITH_TEST_DATA>
+```
+
+Whatever you specify a directory or not, you need a extra step to
+download the data. At choice:
+* Get the data from the repository
+  [openzim/zim-testing-suite](https://github.com/openzim/zim-testing-suite)
+  and put it yourself in the directory.
+* Use the script
+  [download_test_data.py](scripts/download_test_data.py) which will
+  download and extract the data for you.
+* As `ninja` to do it for you with `ninja download_test_data` once the
+  project is configured.
+
+The simple workflow is:
+```bash
+meson . build # Configure the project (using default directory for test data)
+cd build
+ninja # Build
+ninja download_test_data # Download the test data
+meson test # Test
+```
+
+It is possible to deactivate all tests using test data zim files by
+passing `none` to the `test_data_dir` option:
+```bash
+meson . build -Dtest_data_dir=none
+cd build
+ninja
+meson test # Run tests but tests needing test zim files.
+```
+
+If the automated tests fail or timeout, you need to be aware that some
+tests need up to 16GB of memory. You can skip those specific tests with:
+```bash
+SKIP_BIG_MEMORY_TEST=1 meson test
+```
+
+Installation
+------------
+
+If you want to install the Libzim and the headers you just have
+compiled on your system, here we go:
+```bash
+ninja -C build install
+```
+
+You might need to run the command as root (or using `sudo`), depending
+where you want to install the libraries. After the installation
+succeeded, you may need to run ldconfig (as root).
+
+Uninstallation
+------------
+
+If you want to uninstall the Libzim:
+```bash
+ninja -C build uninstall
+```
+
+Like for the installation, you might need to run the command as root
+(or using `sudo`).
+
+Troubleshooting
+---------------
+
+If you need to install Meson "manually":
+```bash
+virtualenv -p python3 ./ # Create virtualenv
+source bin/activate      # Activate the virtualenv
+pip3 install meson       # Install Meson
+hash -r                  # Refresh bash paths
+```
+
+If you need to install Ninja "manually":
+```bash
+git clone git://github.com/ninja-build/ninja.git
+cd ninja
+git checkout release
+./configure.py --bootstrap
+mkdir ../bin
+cp ninja ../bin
+cd ..
+```
+
+If the compilation still fails, you might need to get a more recent
+version of a dependency than the one packaged by your Linux
+distribution. Try then with a source tarball distributed by the
+problematic upstream project or even directly from the source code
+repository.
+
+License
+-------
+
+[GPLv2](https://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html) or
+later, see [COPYING](COPYING) for more details.
diff --git a/debian/changelog b/debian/changelog
new file mode 100644
index 0000000..9385d6f
--- /dev/null
+++ b/debian/changelog
@@ -0,0 +1,5 @@
+libzim (0.0.0) unstable; urgency=medium
+
+  * Initial release.
+
+ -- Kunal Mehta <legoktm@debian.org>  Tue, 02 Jun 2020 01:49:48 -0700
diff --git a/debian/control b/debian/control
new file mode 100644
index 0000000..512b9aa
--- /dev/null
+++ b/debian/control
@@ -0,0 +1,67 @@
+Source: libzim
+Section: libs
+Priority: optional
+Build-Depends: debhelper-compat (= 13),
+ liblzma-dev,
+ libicu-dev,
+ libxapian-dev,
+ libzstd-dev,
+ uuid-dev,
+ libgtest-dev,
+ meson,
+ ninja-build,
+ pkg-config
+Maintainer: Kiwix team <kiwix@kiwix.org>
+Homepage: https://www.openzim.org/wiki/Libzim
+Standards-Version: 4.4.1
+Rules-Requires-Root: no
+
+Package: libzim7
+Architecture: any
+Multi-Arch: same
+Depends: ${misc:Depends},
+ ${shlibs:Depends}
+Pre-Depends: ${misc:Pre-Depends}
+Conflicts: libzim0, libzim0v5, libzim2, libzim4, libzim5
+Replaces: libzim0, libzim0v5, libzim2, libzim4, libzim5
+Description: library implementation of ZIM specifications
+ ZIM (Zeno IMproved) is an open file format for storing the contents of
+ wiki for offline usage. This file format is primarily focused on
+ providing the contents of Wikipedia and Wikimedia projects for offline
+ use.
+ .
+ libzim is the standard implementation of ZIM specification, which
+ implements the read and write method for ZIM files.
+ .
+ ZIM is a file format created with focus on extracting and encoding data
+ from Mediawiki for offline use.
+ .
+ Features of libzim are:
+  * Native, coded in C++
+  * Extremely fast
+  * Minimal footprint
+  * Minimal dependencies
+  * Portable on most OS (Windows, Linux, Mac OS X)
+
+Package: libzim-dev
+Section: libdevel
+Architecture: any
+Depends: ${misc:Depends},
+ libzim7 (= ${binary:Version}),
+ liblzma-dev,
+ libxapian-dev,
+ libicu-dev,
+ libzstd-dev
+Description: library implementation of ZIM specifications (development)
+ ZIM (Zeno IMproved) is an open file format for storing the contents of
+ wiki for offline usage. This file format is primarily focused on
+ providing the contents of Wikipedia and Wikimedia projects for offline
+ use.
+ .
+ libzim is the standard implementation of ZIM specification, which
+ implements the read and write method for ZIM files.
+ .
+ ZIM is a file format created with focus on extracting and encoding data
+ from Mediawiki for offline use.
+ .
+ This package contains development files.
diff --git a/debian/copyright b/debian/copyright
new file mode 100644
index 0000000..ff46366
--- /dev/null
+++ b/debian/copyright
@@ -0,0 +1 @@
+See COPYING in the repository root.
diff --git a/debian/libzim-dev.install b/debian/libzim-dev.install
new file mode 100644
index 0000000..1c1f0c5
--- /dev/null
+++ b/debian/libzim-dev.install
@@ -0,0 +1,3 @@
+usr/include/*
+usr/lib/*/libzim.so
+usr/lib/*/pkgconfig/*
\ No newline at end of file
diff --git a/debian/libzim7.install b/debian/libzim7.install
new file mode 100644
index 0000000..146d0ad
--- /dev/null
+++ b/debian/libzim7.install
@@ -0,0 +1 @@
+usr/lib/*/*.so.*
\ No newline at end of file
diff --git a/debian/rules b/debian/rules
new file mode 100755
index 0000000..67b325d
--- /dev/null
+++ b/debian/rules
@@ -0,0 +1,15 @@
+#!/usr/bin/make -f
+export DEB_BUILD_MAINT_OPTIONS = hardening=+all
+
+# Skip some extremely memory-intensive tests
+export SKIP_BIG_MEMORY_TEST=1
+%:
+	dh $@ --buildsystem=meson
+
+# Skip tests that require zim-testing-data for now
+override_dh_auto_configure:
+	dh_auto_configure -- -Dtest_data_dir=none
+
+# Increase test timeout
+override_dh_auto_test:
+	dh_auto_test -- -t 3
diff --git a/debian/source/format b/debian/source/format
new file mode 100644
index 0000000..89ae9db
--- /dev/null
+++ b/debian/source/format
@@ -0,0 +1 @@
+3.0 (native)
diff --git a/docs/.gitignore b/docs/.gitignore
new file mode 100644
index 0000000..3d8a6cd
--- /dev/null
+++ b/docs/.gitignore
@@ -0,0 +1,2 @@
+api
+xml
diff --git a/docs/6to7.rst b/docs/6to7.rst
new file mode 100644
index 0000000..e897c97
--- /dev/null
+++ b/docs/6to7.rst
@@ -0,0 +1,369 @@
+
+Libzim 7 transition guide
+=========================
+
+
+Libzim7 change a lot of things in the API and in the way we use namespaces (reflected in the API changes).
+This part is a document helping to do the transition from libzim6 to libzim7.
+
+Namespace handling
+------------------
+
+In libzim6 namespaces were exposed to the user. It was to the user to handle them correctly.
+Libzim6 was not doing any assumption about the namespaces.
+However, the usage (mainly from libkiwix) was to store metadata in ``M`` namespace, articles in ``A`` and image/video in ``I``.
+
+On the opposite side, libzim7 hides the concept of namespace and handle it for the user.
+While namespaces are still present and used in the zim format, they have vanished from the libzim api.
+For information (but it is not important to use libzim), we now store all "user content" in ``C`` namespace.
+Metadata are stored in ``M`` namespace and we use few other (``X``, ``W``) for some internal content.
+
+"User content" are accessed using "classic" method to get content.
+Metadata, illustration and such are accessed using specific method.
+
+An article stored in ``A`` namespace before ("A/index.html") is now accessed simply using "index.html".
+(It is stored in "C/index.html" in new format, but you must not specify the namespace in the new api).
+
+Compatibility
+-------------
+
+libzim6 is agnostic about the namespaces. They are exposed to the user, whatever if we are
+reading a new or old zim file. It is up to the user to correctly handle namespaces
+(mainly, content are now in ``C`` instead of ``A``/``I``).
+
+libzim7 tries to be smart about the transition. It will look in the right namespace, depending
+of the zim file.
+Accessing "index.html" should work whatever if we use old or new namespace scheme.
+
+Accessing article/entry
+-----------------------
+
+Getting one entry
+.................
+
+
+In libzim6 accessing an ``Article`` was done using a ``File`` instance.
+You then had to check for the `Article` validity before using it.
+
+ .. code-block:: c++
+
+    auto f = zim::File("foo.zim");
+    auto a = f.getArticleByUrl("A/index.html");
+    if (!a.good()) {
+      std::cerr << "No article "A/index.html" << std::endl;
+    }
+
+In libzim7 you access a |Entry| using a |Archive| instance.
+If there the entry is not found, a exception is raised.
+
+ .. code-block:: c++
+
+    auto a = zim::Archive("foo.zim");
+    try {
+      auto e = a.getEntryByPath("index.html");
+    } catch (zim::EntryNotFound& e) {
+      std::cerr << "No entry "index.html" << std::endl;
+    }
+
+
+Redirection
+...........
+
+
+Article in libzim6 may be a redirection to another article or a article containing data.
+You had to check the kind of the article before using the right set of method.
+Using a method on a wrong kind was undefined behavior.
+
+ .. code-block:: c++
+
+    auto article = [...];
+    if (article.isRedirect()) {
+      auto target = article.getRedirectArticle();
+    } else {
+      auto blob = article.getData();
+    }
+
+
+In libzim7, |Entry| is a kind of intermediate structure, either redirecting to another entry or a item.
+A |Item| is the structure containing the data.
+
+ .. code-block:: c++
+
+    auto entry = [...];
+    if (entry.isRedirect()) {
+      auto target = entry.getRedirectEntry();
+    } else {
+      auto item = entry.getItem();
+      auto blob = item.getData();
+    }
+
+
+As a common usage is to get the item associated to the entry while resolving the redirection chain,
+it is possible to do this easily :
+
+.. code-block:: c++
+
+    auto entry = [...];
+    // Resolve any redirection chain and return the final item.
+    auto item = entry.getItem(true);
+    auto blob = item.getData()
+
+Iteration
+.........
+
+To iterate on article with libzim6 you had to use the ``begin*`` method to get a iterator.
+You may iterate until ``end()`` was reached.
+
+ .. code-block:: c++
+
+    auto file = [...];
+    for(auto it = file.beginByUrl(); it!=file.end(); it++) {
+      auto article = *it;
+      [...]
+    }
+
+
+If you wanted to iterate on article starting by a url prefix it was a bit more complex :
+
+ .. code-block:: c++
+
+    auto file = [...];
+    auto it = file.find("A/ind");
+    while(!it.is_end() && it->getUrl().startWith("A/ind")) {
+      auto article = *it;
+      [...]
+      it++;
+    }
+
+
+In libzim7 you get |EntryRange| on which you can easily iterate on:
+
+ .. code-block:: c++
+
+    auto archive = [...];
+    for(auto entry : archive.iterByPath()) {
+      [...]
+    }
+
+ .. code-block:: c++
+
+    auto archive = [...];
+    for(auto entry : archive.findByPath("ind")) {
+      [...]
+    }
+
+Searching
+---------
+
+In libzim6 searching was made the only class ``Search``
+
+ .. code-block:: c++
+
+    auto f = zim::File("foo.zim");
+    auto search = zim::Search(&f);
+    search.set_query("bar");
+    search.set_range(10, 30);
+    for (auto it =search.begin(); it!=search.end(); it++)
+    {
+      std::cout << "Found result " << it.get_url() << std::endl;
+    }
+
+In libzim7 you search starting from a |Searcher|.
+
+ .. code-block:: c++
+
+    // Create a searcher, something to search on an archive
+    zim::Searcher searcher(archive);
+
+    // We need a query to specify what to search for
+    zim::Query query;
+    query.setQuery("bar");
+
+    // Create a search for the specified query
+    zim::Search search = searcher.search(query);
+
+    // Now we can get some result from the search.
+    // 20 results starting from offset 10 (from 10 to 30)
+    zim::SearchResultSet results = search.getResults(10, 20);
+
+    // SearchResultSet is iterable
+    for(auto entry: results) {
+      std::cout << entry.getPath() << std::endl;
+    }
+
+While it may seems a bit more complex (and it is), it has the main advantage to allow
+reusing of the different instance :
+
+- |Searcher| is what we are searching on, we can do several search on it without recreating a internal xapian database.
+- |Query| is what we are searching for.
+- |Search| is a specific search.
+- |SearchResultSet| is a set of result for a |Search|, it allow getting particular results without having to search several times.
+
+Suggestion
+----------
+
+In libzim6 suggestion was made using the same class ``Search`` but by setting the suggestion mode before
+iterating on the results.
+
+ .. code-block:: c++
+
+    auto f = zim::File("foo.zim");
+    auto search = zim::Search(&f);
+    search.set_query("bar");
+    search.set_range(10, 30);
+    search.set_suggestion_mode(true); // <<<
+    for (auto it =search.begin(); it!=search.end(); it++)
+    {
+      std::cout << "Found result " << it.get_url() << std::endl;
+    }
+
+If the zim file had no suggestion database, the suggestion search was made on full text database
+(with variable results).
+
+In libzim7 you do suggestion using |SuggestionSearcher| API :
+
+ .. code-block:: c++
+
+    // Create a searcher, something to search on an archive
+    zim::SuggestionSearcher searcher(archive);
+
+    // Create a search for the specified query
+    zim::SuggestionSearch search = searcher.search("bar");
+
+    // Now we can get some result from the search.
+    // 20 results starting from offset 10 (from 10 to 30)
+    zim::SuggestionResultSet results = search.getResults(10, 20);
+
+    // SearchResultSet is iterable
+    for(auto entry: results) {
+      std::cout << entry.getPath() << std::endl;
+    }
+
+
+Creating a zim file
+-------------------
+
+Creating a zim file with libzim6 was pretty complex.
+One had to inherit the ``zim::writer::Creator`` to provide the main url.
+Then it had to inherit from ``zim::writer::Article`` to be able to add different kind of article to the zim file.
+
+ .. code-block:: c++
+
+    class MyCreator: public zim::writer::Creator {
+      Url getMainUrl() const { return Url('A', "index.html"); }
+    };
+
+    class RedirectArticle : public zim::writer::Article {
+      public:
+        RedirectArticle(const std::string& title, const std::string& url, const std::string& target) 
+          : title(title),
+            url(url),
+            target(target)
+            {}
+
+        bool isRedirect() const { return true; }
+        zim::writer::Url getUrl() const { return url; }
+        std::string getTitle() const { return title; }
+       zim::writer::Url getRedirectUrl()  const { return target; }
+
+      private:
+        std::string title;
+        std::string url;
+        std::string target;
+    };
+
+    class ContentArticle: public zim::writer::Article {
+      ContentArticle(const std::string& title, const std::string& url, const std::string& mimetype, const std::string& content)
+            : title(title),
+            url(url),
+            mimetype(mimetype),
+            content(content)
+            {}
+
+        bool isRedirect() const { return false; }
+        zim::writer::Url getUrl() const { return url; }
+        std::string getTitle() const { return title; }
+        std::string getMimeType() const { return mimetype; }
+        Blob getData() const { return Blob(content.data(), content.size()); }
+      private:
+        std::string title;
+        std::string url;
+        std::string mimetype;
+        std::string content;
+    };
+
+    int main() {
+      MyCreator creator();
+      creator.startZimCreation("out_file.zim");
+      std::shared_ptr<zim::writer::Article> article = std::make_shared<ContentArticle>("A article", "A/article", "text/html", "A content");
+      creator.addArticle(article);
+      std::shared_ptr<zim::writer::Article> redirect = std::make_shared<RedirectArticle>("A redirect", "A/redirect", "A/article");
+      creator.addArticle(redirect);
+      creator.finishZimCreation();
+    }
+
+On libzim7, you don't have to inherit the |Creator|.
+Redirect and metadata are added using |addRedirection| and |addMetadata|.
+You still may have to inherit |WriterItem| but default implementation
+are provided (|StringItem|, |FileItem|).
+
+ .. code-block:: c++
+
+    int main() {
+      zim::writer::Creator creator;
+      creator.startZimCreation();
+      creator.addRedirection("A/redirect", "A redirect", "A/article");
+      std::shared_ptr<zim::writer::Item> item = std::make_shared<StringItem>("article", "text/html", "A article", {}, "A content");
+      creator.addItem(item);
+      creator.finishZimCreation();
+    }
+
+Metadata and Illustration
+.........................
+
+Metadata are adding using |addMetadata|.
+You don't have to create a specific item in ``M`` namespace.
+
+The creator now create the ``M/Counter`` metadata for you. You don't have (and must not) add a ``M/Counter`` yourself.
+
+Favicon has been deprecated in favor of Illustration.
+In libzim6, you had to add a file in ``I`` namespace and add a ``-/favicon`` redirection to the file.
+In libzim7, you have to use the |addIllustration| method.
+
+
+Hints
+.....
+
+Hints are a new concept in libzim7.
+This is a generic way to pass information to the creator about how to handle item/redirection.
+
+An almost mandatory hint to pass is the hint ``FRONT_ARTICLE`` (|HintKeys|).
+``FRONT_ARTICLE`` mark entry (item or redirection) as main article for the reader
+(typically a html page in opposition to a resource file as css, js, ...).
+Random and suggestion feature will search only in entries marked as ``FRONT_ARTICLE``.
+If no entry are marked as ``FRONT_ARTICLE``, all entries will be used.
+
+ .. Declare some replacement helpers
+
+ .. |Archive| replace:: :class:`zim::Archive`
+ .. |EntryRange| replace:: :class:`zim::Archive::EntryRange`
+ .. |Entry| replace:: :class:`zim::Entry`
+ .. |Item| replace:: :class:`zim::Item`
+ .. |EntryNotFound| replace:: :class:`zim::EntryNotFound`
+ .. |Searcher| replace:: :class:`zim::Searcher`
+ .. |Search| replace:: :class:`zim::Search`
+ .. |Query| replace:: :class:`zim::Query`
+ .. |SearchResultSet| replace:: :class:`zim::SearchResultSet`
+ .. |SuggestionSearcher| replace:: :class:`zim::SuggestionSearcher`
+ .. |getEntryByPath| replace:: :func:`getEntryByPath<void zim::Archive::getEntryByPath(const std::string&) const>`
+ .. |getEntryByTitle| replace:: :func:`getEntryByTitle<void zim::Archive::getEntryByTitle(const std::string&) const>`
+ .. |findByPath| replace:: :func:`findByPath<zim::Archive::findByPath>`
+ .. |findByTitle| replace:: :func:`findByTitle<zim::Archive::findByTitle>`
+ .. |Creator| replace:: :class:`zim::writer::Creator`
+ .. |WriterItem| replace:: :class:`zim::writer::Item`
+ .. |StringItem| replace:: :class:`zim::writer::StringItem`
+ .. |FileItem| replace:: :class:`zim::writer::FileItem`
+ .. |addMetadata| replace:: :func:`addMetadata<zim::writer::Creator::addMetadata>`
+ .. |addRedirection| replace:: :func:`addRedirection<zim::writer::Creator::addRedirection>`
+ .. |addIllustration| replace:: :func:`addIllustration<zim::writer::Creator::addIllustration>`
+ .. |HintKeys| replace:: :enum:`zim::writer::HintKeys`
diff --git a/docs/conf.py b/docs/conf.py
new file mode 100644
index 0000000..ab420ab
--- /dev/null
+++ b/docs/conf.py
@@ -0,0 +1,72 @@
+# Configuration file for the Sphinx documentation builder.
+#
+# This file only contains a selection of the most common options. For a full
+# list see the documentation:
+# https://www.sphinx-doc.org/en/master/usage/configuration.html
+
+# -- Path setup --------------------------------------------------------------
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#
+import os
+# import sys
+# sys.path.insert(0, os.path.abspath('.'))
+
+
+# -- Project information -----------------------------------------------------
+
+project = 'libzim'
+copyright = '2020, libzim-team'
+author = 'libzim-team'
+
+
+# -- General configuration ---------------------------------------------------
+
+on_rtd = os.environ.get('READTHEDOCS', None) == 'True'
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = [
+    'breathe',
+    'exhale'
+]
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ['_templates']
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+# This pattern also affects html_static_path and html_extra_path.
+exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
+
+
+if not on_rtd:
+    html_theme = 'sphinx_rtd_theme'
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ['_static']
+
+breathe_projects = {
+    "libzim": "./xml"
+}
+breathe_default_project = 'libzim'
+
+exhale_args = {
+    "containmentFolder": "./api",
+    "rootFileName": "ref_api.rst",
+    "rootFileTitle": "Reference API",
+    "doxygenStripFromPath": "..",
+    "treeViewIsBootstrap": True,
+    "createTreeView" : True,
+    "exhaleExecutesDoxygen": True,
+    "exhaleDoxygenStdin": "INPUT = ../include"
+}
+
+primary_domain = 'cpp'
+
+highlight_language = 'cpp'
diff --git a/docs/index.rst b/docs/index.rst
new file mode 100644
index 0000000..a2412dd
--- /dev/null
+++ b/docs/index.rst
@@ -0,0 +1,15 @@
+.. libzim documentation master file, created by
+   sphinx-quickstart on Fri Jul 24 15:40:50 2020.
+   You can adapt this file completely to your liking, but it should at least
+   contain the root `toctree` directive.
+
+Welcome to libzim's documentation!
+==================================
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Contents:
+
+   usage
+   6to7
+   api/ref_api
diff --git a/docs/meson.build b/docs/meson.build
new file mode 100644
index 0000000..71d4fc5
--- /dev/null
+++ b/docs/meson.build
@@ -0,0 +1,7 @@
+
+sphinx = find_program('sphinx-build', native:true)
+
+sphinx_target = run_target('doc',
+    command: [sphinx, '-bhtml',
+              meson.current_source_dir(),
+              meson.current_build_dir()])
diff --git a/docs/requirements.txt b/docs/requirements.txt
new file mode 100644
index 0000000..d805c25
--- /dev/null
+++ b/docs/requirements.txt
@@ -0,0 +1,3 @@
+breathe
+exhale
+sphinx<4
diff --git a/docs/usage.rst b/docs/usage.rst
new file mode 100644
index 0000000..d4d4d1f
--- /dev/null
+++ b/docs/usage.rst
@@ -0,0 +1,168 @@
+Libzim programming
+==================
+
+Introduction
+------------
+
+libzim is written in C++. To use the library, you need the include files of libzim have
+to link against libzim.
+
+Errors are handled with exceptions. When something goes wrong, libzim throws an error,
+which is always derived from std::exception.
+
+All classes are defined in the namespace zim.
+Copying is allowed and tried to make as cheap as possible.
+The reading part of the libzim is most of the time thread safe.
+Searching and creating part are not. You have to serialize access to the class yourself.
+
+The main class, which accesses a archive is |Archive|.
+It has actually a reference to an implementation, so that copies of the class just references the same file.
+You open a file by passing the file name to the constructor as a std::string.
+
+Iterating over entries is made by creating a |EntryRange|.
+
+.. code-block:: c++
+
+    #include <zim/file.h>
+    #include <zim/fileiterator.h>
+    #include <iostream>
+    int main(int argc, char* argv[])
+    {
+      try
+      {
+        zim::Archive a("wikipedia.zim");
+
+        for (auto entry: a.iterByPath()) {
+          std::cout << "path: " << entry.getPath() << " title: " << entry.getTitle() << std::endl;
+        }
+      } catch (const std::exception& e) {
+        std::cerr << e.what() << std::endl;
+      }
+    }
+
+In subsequent examples, only code needed to use the library will be explained.
+The main-function with the error catcher should always be in place.
+
+Getting entries
+---------------
+
+Entries are addressed either by path or title.
+
+|Archive| has methods |getEntryByPath| and |getEntryByTitle|. Both take 1 parameters : a string, which specifies the path or the title of the entry to get.
+They return a |Entry|.
+If the entry cannot be found, they throw the exception |EntryNotFound|.
+
+Entry are entry point in a archive for "things". It can be a redirection to another entry or a |Item|
+
+  .. code-block:: c++
+
+    auto entry = archive.getEntryByPath("foo");
+    if (entry.isRedirect()) {
+      std::cout << "This is a redirection to " << entry.getRedirectEntry().getPath() << std::endl();
+    } else {
+      std::cout << "This is a item with content : " << entry.getItem().getData() << std::endl();
+    }
+
+As it is pretty common to resolve potential entry redirection and get the final item, you can do it directly using `getItem` :
+
+  .. code-block:: c++
+
+    auto entry = archive.getEntryByPath("foo");
+    auto item = entry.getItem(true);
+    if (entry.isRedirect()) {
+      std::cout << "Entry " << entry.getPath() << " is a entry pointing to the item " << item.getPath() << std::endl;
+    } else {
+      std::cout << entry.getPath() << " should be equal to " << item.getPath() << std::endl;
+    }
+    std::cout << "The item data is " << item.getData() << std::endl;
+
+Finding entries
+---------------
+
+|getEntryByPath|/|getEntryByTitle| allow to get a exact entry.
+But you may want to find entries using a more loosely method.
+|findByPath| and |findByTitle| allow you to find entries starting by the given path/title prefix.
+
+|findByPath|/|findByTitle| return a |EntryRange| you can iterate on :
+
+  .. code-block:: c++
+
+    for (auto entry: archive.findEntryByPath("fo")) {
+      std::cout << "Entry " << entry.getPath() << " should starts with fo." << std::endl;
+    }
+
+Searching for entries
+---------------------
+
+Find entries by path/title is nice but you may want to search for entries base on their content.
+If the zim archive contains a full text index, you can search on it.
+
+The class |Searcher| allow to search on one or several |Archive|.
+It allows to create a |Search| which represent a particular search for a |Query|.
+From a |Search|, you can get a |SearchResultSet| on which you can iterate.
+
+ .. code-block:: c++
+
+    // Create a searcher, something to search on an archive
+    zim::Searcher searcher(archive);
+
+    // We need a query to specify what to search for
+    zim::Query query;
+    query.setQuery("bar");
+
+    // Create a search for the specified query
+    zim::Search search = searcher.search(query);
+
+    // Now we can get some result from the search.
+    // 20 results starting from offset 10 (from 10 to 30)
+    zim::SearchResultSet results = search.getResults(10, 20);
+
+    // SearchResultSet is iterable
+    for(auto entry: results) {
+      std::cout << entry.getPath() << std::endl;
+    }
+
+Searching for suggestions
+-------------------------
+
+While |findByTitle| may be a good start to search for suggestion, you may want to search for suggestion for term
+in the middle of the suggestion.
+
+The suggestion API allow you to search for suggestion, using suggestion database included in recent zim files.
+The suggestion API is pretty close from the search API:
+
+ .. code-block:: c++
+
+    // Create a searcher, something to search on an archive
+    zim::SuggestionSearcher searcher(archive);
+
+    // Create a search for the specified query
+    zim::SuggestionSearch search = searcher.search("bar");
+
+    // Now we can get some result from the search.
+    // 20 results starting from offset 10 (from 10 to 30)
+    zim::SuggestionResultSet results = search.getResults(10, 20);
+
+    // SearchResultSet is iterable
+    for(auto entry: results) {
+      std::cout << entry.getPath() << std::endl;
+    }
+
+If the zim file doesn't contain a suggestion database, the suggestion will fallback to |findByTitle| for you.
+
+ .. Declare some replacement helpers
+
+ .. |Archive| replace:: :class:`zim::Archive`
+ .. |EntryRange| replace:: :class:`zim::Archive::EntryRange`
+ .. |Entry| replace:: :class:`zim::Entry`
+ .. |Item| replace:: :class:`zim::Item`
+ .. |EntryNotFound| replace:: :class:`zim::EntryNotFound`
+ .. |Searcher| replace:: :class:`zim::Searcher`
+ .. |Search| replace:: :class:`zim::Search`
+ .. |Query| replace:: :class:`zim::Query`
+ .. |SearchResultSet| replace:: :class:`zim::SearchResultSet`
+ .. |getEntryByPath| replace:: :func:`getEntryByPath<void zim::Archive::getEntryByPath(const std::string&) const>`
+ .. |getEntryByTitle| replace:: :func:`getEntryByTitle<void zim::Archive::getEntryByTitle(const std::string&) const>`
+ .. |findByPath| replace:: :func:`findByPath<zim::Archive::findByPath>`
+ .. |findByTitle| replace:: :func:`findByTitle<zim::Archive::findByTitle>`
+
diff --git a/examples/createZimExample.cpp b/examples/createZimExample.cpp
new file mode 100644
index 0000000..998bff1
--- /dev/null
+++ b/examples/createZimExample.cpp
@@ -0,0 +1,95 @@
+/*
+ * Copyright (C) 2012 Tommi Maekitalo
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#include <iostream>
+#include <sstream>
+#include <vector>
+
+#include <zim/writer/contentProvider.h>
+#include <zim/writer/creator.h>
+#include <zim/blob.h>
+
+class TestItem : public zim::writer::Item
+{
+    std::string _id;
+    std::string _data;
+
+  public:
+    TestItem()  { }
+    explicit TestItem(const std::string& id);
+    virtual ~TestItem() = default;
+
+    virtual std::string getPath() const;
+    virtual std::string getTitle() const;
+    virtual std::string getMimeType() const;
+
+    virtual std::unique_ptr<zim::writer::ContentProvider> getContentProvider() const;
+};
+
+TestItem::TestItem(const std::string& id)
+  : _id(id)
+{
+  std::ostringstream data;
+  data << "this is item " << id << std::endl;
+  _data = data.str();
+}
+
+std::string TestItem::getPath() const
+{
+  return std::string("A/") + _id;
+}
+
+std::string TestItem::getTitle() const
+{
+  return _id;
+}
+
+std::string TestItem::getMimeType() const
+{
+  return "text/plain";
+}
+
+std::unique_ptr<zim::writer::ContentProvider> TestItem::getContentProvider() const
+{
+  return std::unique_ptr<zim::writer::ContentProvider>(new zim::writer::StringProvider(_data));
+}
+
+int main(int argc, char* argv[])
+{
+  unsigned max = 16;
+  try {
+    zim::writer::Creator c;
+    c.configVerbose(false).configCompression(zim::Compression::Zstd);
+    c.startZimCreation("foo.zim");
+    for (unsigned n = 0; n < max; ++n)
+    {
+      std::ostringstream id;
+      id << (n + 1);
+      auto article = std::make_shared<TestItem>(id.str());
+      c.addItem(article);
+    }
+    c.setMainPath("A/0");
+    c.finishZimCreation();
+  }
+  catch (const std::exception& e)
+  {
+    std::cerr << e.what() << std::endl;
+  }
+}
+
diff --git a/examples/meson.build b/examples/meson.build
new file mode 100644
index 0000000..3b804e4
--- /dev/null
+++ b/examples/meson.build
@@ -0,0 +1,6 @@
+
+executable('createZimExample', 'createZimExample.cpp',
+           link_with: libzim,
+           link_args: extra_link_args,
+           include_directories: include_directory,
+           dependencies: [thread_dep, xapian_dep, icu_dep, lzma_dep, zstd_dep])
diff --git a/include/meson.build b/include/meson.build
new file mode 100644
index 0000000..a08d9e6
--- /dev/null
+++ b/include/meson.build
@@ -0,0 +1,3 @@
+subdir('zim')
+
+include_directory = include_directories('.')
diff --git a/include/zim/archive.h b/include/zim/archive.h
new file mode 100644
index 0000000..2650d08
--- /dev/null
+++ b/include/zim/archive.h
@@ -0,0 +1,634 @@
+/*
+ * Copyright (C) 2020-2021 Matthieu Gautier <mgautier@kymeria.fr>
+ * Copyright (C) 2021 Maneesh P M <manu.pm55@gmail.com>
+ * Copyright (C) 2020 Veloman Yunkan
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#ifndef ZIM_ARCHIVE_H
+#define ZIM_ARCHIVE_H
+
+#include "zim.h"
+#include "entry.h"
+#include "uuid.h"
+
+#include <string>
+#include <vector>
+#include <memory>
+#include <bitset>
+#include <set>
+
+namespace zim
+{
+  class FileImpl;
+
+  enum class EntryOrder {
+    pathOrder,
+    titleOrder,
+    efficientOrder
+  };
+
+  /**
+   * The Archive class to access content in a zim file.
+   *
+   * The `Archive` is the main class to access content in a zim file.
+   * `Archive` are lightweight object and can be copied easily.
+   *
+   * An `Archive` is read-only, and internal states (as caches) are protected
+   * from race-condition. Therefore, all methods of `Archive` are threadsafe.
+   *
+   * All methods of archive may throw an `ZimFileFormatError` if the file is invalid.
+   */
+  class Archive
+  {
+    public:
+      template<EntryOrder order> class EntryRange;
+      template<EntryOrder order> class iterator;
+
+      /** Archive constructor.
+       *
+       *  Construct an archive from a filename.
+       *  The file is open readonly.
+       *
+       *  The filename is the "logical" path.
+       *  So if you want to open a split zim file (foo.zimaa, foo.zimab, ...)
+       *  you must pass the `foo.zim` path.
+       *
+       *  @param fname The filename to the file to open (utf8 encoded)
+       */
+      explicit Archive(const std::string& fname);
+
+#ifndef _WIN32
+      /** Archive constructor.
+       *
+       *  Construct an archive from a file descriptor.
+       *
+       *  Note: This function is not available under Windows.
+       *
+       *  @param fd The descriptor of a seekable file representing a ZIM archive
+       */
+      explicit Archive(int fd);
+
+      /** Archive constructor.
+       *
+       *  Construct an archive from a descriptor of a file with an embedded ZIM
+       *  archive inside.
+       *
+       *  Note: This function is not available under Windows.
+       *
+       *  @param fd The descriptor of a seekable file with a continuous segment
+       *  representing a complete ZIM archive.
+       *  @param offset The offset of the ZIM archive relative to the beginning
+       *  of the file (rather than the current position associated with fd).
+       *  @param size The size of the ZIM archive.
+       */
+      Archive(int fd, offset_type offset, size_type size);
+#endif
+
+      /** Return the filename of the zim file.
+       *
+       *  Return the filename as passed to the constructor
+       *  (So foo.zim).
+       *
+       *  @return The logical filename of the archive.
+       */
+      const std::string& getFilename() const;
+
+      /** Return the logical archive size.
+       *
+       *  Return the size of the full archive, not the size of the file on the fs.
+       *  If the zim is split, return the sum of the size of the parts.
+       *
+       *  @return The logical size of the archive.
+       */
+      size_type getFilesize() const;
+
+      /** Return the number of entries in the archive.
+       *
+       * Return the total number of entries in the archive, including
+       * internal entries created by libzim itself, metadata, indexes, ...
+       *
+       *  @return the number of all entries in the archive.
+       */
+      entry_index_type getAllEntryCount() const;
+
+      /** Return the number of user entries in the archive.
+       *
+       * If the notion of "user entries" doesn't exist in the zim archive,
+       * returns `getAllEntryCount()`.
+       *
+       *  @return the number of user entries in the archive.
+       */
+      entry_index_type getEntryCount() const;
+
+      /** Return the number of articles in the archive.
+       *
+       *  The definition of "article" depends of the zim archive.
+       *  On recent archives, this correspond to all entries marked as "FRONT_ARTICLE"
+       *  at creaton time.
+       *  On old archives, this correspond to all entries in 'A' namespace.
+       *  Few archives may have been created without namespace but also without specific
+       *  article listing. In this case, articles are all user entries.
+       *
+       *  @return the number of articles in the archive.
+       */
+      entry_index_type getArticleCount() const;
+
+      /** The uuid of the archive.
+       *
+       *  @return the uuid of the archive.
+       */
+      Uuid getUuid() const;
+
+      /** Get a specific metadata content.
+       *
+       *  Get the content of a metadata stored in the archive.
+       *
+       *  @param name The name of the metadata.
+       *  @return The content of the metadata.
+       *  @exception EntryNotFound If the metadata is not in the arcthive.
+       */
+      std::string getMetadata(const std::string& name) const;
+
+      /** Get a specific metadata item.
+       *
+       *  Get the item associated to a metadata stored in the archive.
+       *
+       *  @param name The name of the metadata.
+       *  @return The item associated to the metadata.
+       *  @exception EntryNotFound If the metadata in not in the archive.
+       */
+      Item getMetadataItem(const std::string& name) const;
+
+      /** Get the list of metadata stored in the archive.
+       *
+       *  @return The list of metadata in the archive.
+       */
+      std::vector<std::string> getMetadataKeys() const;
+
+      /** Get the illustration item of the archive.
+       *
+       *  Illustration is a icon for the archive that can be used in catalog and so to illustrate the archive.
+       *
+       *  @param size The size (width and height) of the illustration to get. Default to 48 (48x48px icon)
+       *  @return The illustration item.
+       *  @exception EntryNotFound If no illustration item can be found.
+       */
+      Item getIllustrationItem(unsigned int size=48) const;
+
+      /** Return a list of available sizes (width) for the illustations in the archive.
+       *
+       * Illustration is an icon for the archive that can be used in catalog and elsewehere to illustrate the archive.
+       * An Archive may contains several illustrations with different size.
+       * This method allows to know which illustration are in the archive (by size: width)
+       *
+       * @return A set of size.
+       */
+      std::set<unsigned int> getIllustrationSizes() const;
+
+
+      /** Get an entry using its "path" index.
+       *
+       *  Use the index of the entry to get the idx'th entry
+       *  (entry being sorted by path).
+       *
+       *  @param idx The index of the entry.
+       *  @return The Entry.
+       *  @exception std::out_of_range If idx is greater than the number of entry.
+       */
+      Entry getEntryByPath(entry_index_type idx) const;
+
+      /** Get an entry using a path.
+       *
+       *  Get an entry using its path.
+       *  The path must contains the namespace.
+       *
+       *  @param path The entry's path.
+       *  @return The Entry.
+       *  @exception EntryNotFound If no entry has the asked path.
+       */
+      Entry getEntryByPath(const std::string& path) const;
+
+      /** Get an entry using its "title" index.
+       *
+       *  Use the index of the entry to get the idx'th entry
+       *  (entry being sorted by title).
+       *
+       *  @param idx The index of the entry.
+       *  @return The Entry.
+       *  @exception std::out_of_range If idx is greater than the number of entry.
+       */
+      Entry getEntryByTitle(entry_index_type idx) const;
+
+      /** Get an entry using a title.
+       *
+       *  Get an entry using its path.
+       *
+       *  @param title The entry's title.
+       *  @return The Entry.
+       *  @exception EntryNotFound If no entry has the asked title.
+       */
+      Entry getEntryByTitle(const std::string& title) const;
+
+      /** Get an entry using its "cluster" index.
+       *
+       *  Use the index of the entry to get the idx'th entry
+       *  The actual order of the entry is not really specified.
+       *  It is infered from the internal way the entry are stored.
+       *
+       *  This method is probably not relevent and is provided for completeness.
+       *  You should probably use a iterator using the `efficientOrder`.
+       *
+       *  @param idx The index of the entry.
+       *  @return The Entry.
+       *  @exception std::out_of_range If idx is greater than the number of entry.
+       */
+      Entry getEntryByClusterOrder(entry_index_type idx) const;
+
+      /** Get the main entry of the archive.
+       *
+       *  @return The Main entry.
+       *  @exception EntryNotFound If no main entry has been specified in the archive.
+       */
+      Entry getMainEntry() const;
+
+      /** Get a random entry.
+       *
+       * The entry is picked randomly from the front artice list.
+       *
+       * @return A random entry.
+       * @exception EntryNotFound If no valid random entry can be found.
+       */
+      Entry getRandomEntry() const;
+
+      /** Check in an entry has path in the archive.
+       *
+       *  @param path The entry's path.
+       *  @return True if the path in the archive, false else.
+       */
+      bool hasEntryByPath(const std::string& path) const {
+        try{
+          getEntryByPath(path);
+          return true;
+        } catch(...) { return false; }
+      }
+
+      /** Check in an entry has title in the archive.
+       *
+       *  @param title The entry's title.
+       *  @return True if the title in the archive, false else.
+       */
+      bool hasEntryByTitle(const std::string& title) const {
+        try{
+          getEntryByTitle(title);
+          return true;
+        } catch(...) { return false; }
+      }
+
+      /** Check if archive has a main entry
+       *
+       * @return True if the archive has a main entry.
+       */
+      bool hasMainEntry() const;
+
+      /** Check if archive has a favicon entry
+       *
+       * @param size The size (width and height) of the illustration to check. Default to 48 (48x48px icon)
+       * @return True if the archive has a corresponding illustration entry.
+       *         (Always True if the archive has no illustration, but a favicon)
+       */
+      bool hasIllustration(unsigned int size=48) const;
+
+      /** Check if the archive has a fulltext index.
+       *
+       * @return True if the archive has a fulltext index
+       */
+      bool hasFulltextIndex() const;
+
+      /** Check if the archive has a title index.
+       *
+       * @return True if the archive has a title index
+       */
+      bool hasTitleIndex() const;
+
+
+      /** Get a "iterable" by path order.
+       *
+       *  This method allow to iterate on all user entries using a path order.
+       *  If the notion of "user entries" doesn't exists (for old zim archive),
+       *  this iterate on all entries in the zim file.
+       *
+       *  ```
+       *  for(auto& entry:archive.iterByPath()) {
+       *     ...
+       *  }
+       *  ```
+       *
+       *  @return A range on all the entries, in path order.
+       */
+      EntryRange<EntryOrder::pathOrder> iterByPath() const;
+
+      /** Get a "iterable" by title order.
+       *
+       *  This method allow to iterate on all articles using a title order.
+       *  The definition of "article" depends of the zim archive.
+       *  On recent archives, this correspond to all entries marked as "FRONT_ARTICLE"
+       *  at creaton time.
+       *  On old archives, this correspond to all entries in 'A' namespace.
+       *  Few archives may have been created without namespace but also without specific
+       *  article listing. In this case, this iterate on all user entries.
+       *
+       *  ```
+       *  for(auto& entry:archive.iterByTitle()) {
+       *     ...
+       *  }
+       *  ```
+       *
+       *  @return A range on all the entries, in title order.
+       */
+      EntryRange<EntryOrder::titleOrder> iterByTitle() const;
+
+      /** Get a "iterable" by a efficient order.
+       *
+       *  This method allow to iterate on all user entries using a effictient order.
+       *  If the notion of "user entries" doesn't exists (for old zim archive),
+       *  this iterate on all entries in the zim file.
+       *
+       *  ```
+       *  for(auto& entry:archive.iterEfficient()) {
+       *     ...
+       *  }
+       *  ```
+       *
+       *  @return A range on all the entries, in efficitent order.
+       */
+      EntryRange<EntryOrder::efficientOrder> iterEfficient() const;
+
+      /** Find a range of entries starting with path.
+       *
+       * The path is the "long path". (Ie, with the namespace)
+       *
+       * @param path The path prefix to search for.
+       * @return A range starting from the first entry starting with path
+       *         and ending past the last entry.
+       *         If no entry starts with `path`, begin == end.
+       */
+      EntryRange<EntryOrder::pathOrder>  findByPath(std::string path) const;
+
+      /** Find a range of entry starting with title.
+       *
+       * The entry title is search in `A` namespace.
+       *
+       * @param title The title prefix to search for.
+       * @return A range starting from the first entry starting with title
+       *         and ending past the last entry.
+       *         If no entry starts with `title`, begin == end.
+       */
+      EntryRange<EntryOrder::titleOrder> findByTitle(std::string title) const;
+
+      /** hasChecksum.
+       *
+       * The checksum is not the checksum of the file.
+       * It is an internal checksum stored in the zim file.
+       *
+       * @return True if the archive has a checksum.
+       */
+      bool hasChecksum() const;
+
+      /** getChecksum.
+       *
+       * @return the checksum stored in the archive.
+       *         If the archive has no checksum return an empty string.
+       */
+      std::string getChecksum() const;
+
+      /** Check that the zim file is valid (in regard to its checksum).
+       *
+       *  If the zim file has no checksum return false.
+       *
+       *  @return True if the file is valid.
+       */
+      bool check() const;
+
+      /** Check the integrity of the zim file.
+       *
+       * Run different type of checks to verify the zim file is valid
+       * (in regard to the zim format).
+       * This may be time consuming.
+       *
+       * @return True if the file is valid.
+       */
+      bool checkIntegrity(IntegrityCheck checkType);
+
+      /** Check if the file is split in the filesystem.
+       *
+       *  @return True if the archive is split in different file (foo.zimaa, foo.zimbb).
+       */
+      bool isMultiPart() const;
+
+      /** Get if the zim archive uses the new namespace scheme.
+       *
+       * Recent zim file use the new namespace scheme.
+       *
+       * On user perspective, it means that :
+       * - On old namespace scheme :
+       *  . All entries are accessible, either using `getEntryByPath` with a specific namespace
+       *    or simply iterating over the entries (with `iter*` methods).
+       *  . Entry's path has namespace included ("A/foo.html")
+       * - On new namespace scheme :
+       *  . Only the "user" entries are accessible with `getEntryByPath` and `iter*` methods.
+       *    To access metadatas, use `getMetadata` method.
+       *  . Entry's path do not contains namespace ("foo.html")
+       */
+      bool hasNewNamespaceScheme() const;
+
+      /** Get a shared ptr on the FileImpl
+       *
+       *  @internal
+       *  @return The shared_ptr
+       */
+      std::shared_ptr<FileImpl> getImpl() const { return m_impl; }
+
+#ifdef ZIM_PRIVATE
+      cluster_index_type getClusterCount() const;
+      offset_type getClusterOffset(cluster_index_type idx) const;
+      entry_index_type getMainEntryIndex() const;
+#endif
+
+    private:
+      std::shared_ptr<FileImpl> m_impl;
+  };
+
+  template<EntryOrder order>
+  entry_index_type _toPathOrder(const FileImpl& file, entry_index_type idx);
+
+  template<>
+  entry_index_type _toPathOrder<EntryOrder::pathOrder>(const FileImpl& file, entry_index_type idx);
+  template<>
+  entry_index_type _toPathOrder<EntryOrder::titleOrder>(const FileImpl& file, entry_index_type idx);
+  template<>
+  entry_index_type _toPathOrder<EntryOrder::efficientOrder>(const FileImpl& file, entry_index_type idx);
+
+
+  /**
+   * A range of entries in an `Archive`.
+   *
+   * `EntryRange` represents a range of entries in a specific order.
+   *
+   * An `EntryRange` can't be modified is consequently threadsafe.
+   */
+  template<EntryOrder order>
+  class Archive::EntryRange {
+    public:
+      explicit EntryRange(const std::shared_ptr<FileImpl> file, entry_index_type begin, entry_index_type end)
+        : m_file(file),
+          m_begin(begin),
+          m_end(end)
+      {}
+
+      iterator<order> begin() const
+        { return iterator<order>(m_file, entry_index_type(m_begin)); }
+      iterator<order> end() const
+        { return iterator<order>(m_file, entry_index_type(m_end)); }
+      int size() const
+        { return m_end - m_begin; }
+
+      EntryRange<order> offset(int start, int maxResults) const
+      {
+        auto begin = m_begin + start;
+        if (begin > m_end) {
+          begin = m_end;
+        }
+        auto end = m_end;
+        if (begin + maxResults < end) {
+          end = begin + maxResults;
+        }
+        return EntryRange<order>(m_file, begin, end);
+      }
+
+private:
+      std::shared_ptr<FileImpl> m_file;
+      entry_index_type m_begin;
+      entry_index_type m_end;
+  };
+
+  /**
+   * An iterator on an `Archive`.
+   *
+   * `Archive::iterator` stores an internal state which is not protected
+   * from race-condition. It is not threadsafe.
+   *
+   * An `EntryRange` can't be modified and is consequently threadsafe.
+   */
+  template<EntryOrder order>
+  class Archive::iterator : public std::iterator<std::bidirectional_iterator_tag, Entry>
+  {
+    public:
+      explicit iterator(const std::shared_ptr<FileImpl> file, entry_index_type idx)
+        : m_file(file),
+          m_idx(idx),
+          m_entry(nullptr)
+      {}
+
+      iterator(const iterator<order>& other)
+        : m_file(other.m_file),
+          m_idx(other.m_idx),
+          m_entry(other.m_entry?new Entry(*other.m_entry):nullptr)
+      {}
+
+      bool operator== (const iterator<order>& it) const
+        { return m_file == it.m_file && m_idx == it.m_idx; }
+      bool operator!= (const iterator<order>& it) const
+        { return !operator==(it); }
+
+      iterator<order>& operator=(iterator<order>&& it) = default;
+
+      iterator<order>& operator=(iterator<order>& it)
+      {
+        m_entry.reset();
+        m_idx = it.m_idx;
+        m_file = it.m_file;
+        return *this;
+      }
+
+      iterator<order>& operator++()
+      {
+        ++m_idx;
+        m_entry.reset();
+        return *this;
+      }
+
+      iterator<order> operator++(int)
+      {
+        auto it = *this;
+        operator++();
+        return it;
+      }
+
+      iterator<order>& operator--()
+      {
+        --m_idx;
+        m_entry.reset();
+        return *this;
+      }
+
+      iterator<order> operator--(int)
+      {
+        auto it = *this;
+        operator--();
+        return it;
+      }
+
+      const Entry& operator*() const
+      {
+        if (!m_entry) {
+          m_entry.reset(new Entry(m_file, _toPathOrder<order>(*m_file, m_idx)));
+        }
+        return *m_entry;
+      }
+
+      const Entry* operator->() const
+      {
+        operator*();
+        return m_entry.get();
+      }
+
+    private:
+      std::shared_ptr<FileImpl> m_file;
+      entry_index_type m_idx;
+      mutable std::unique_ptr<Entry> m_entry;
+  };
+
+  /**
+   * The set of the integrity checks to be performed by `zim::validate()`.
+   */
+  typedef std::bitset<size_t(IntegrityCheck::COUNT)> IntegrityCheckList;
+
+  /** Check the integrity of the zim file.
+   *
+   * Run the specified checks to verify the zim file is valid
+   * (with regard to the zim format). Some checks can be quite slow.
+   *
+   * @param zimPath The path of the ZIM archive to be checked.
+   * @param checksToRun The set of checks to perform.
+   * @return False if any check fails, true otherwise.
+   */
+  bool validate(const std::string& zimPath, IntegrityCheckList checksToRun);
+}
+
+#endif // ZIM_ARCHIVE_H
+
diff --git a/include/zim/blob.h b/include/zim/blob.h
new file mode 100644
index 0000000..2b0662d
--- /dev/null
+++ b/include/zim/blob.h
@@ -0,0 +1,88 @@
+/*
+ * Copyright (C) 2018 Matthieu Gautier <mgautier@kymeria.fr>
+ * Copyright (C) 2009 Tommi Maekitalo
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#ifndef ZIM_BLOB_H
+#define ZIM_BLOB_H
+
+#include "zim.h"
+
+#include <iostream>
+#include <string>
+#include <algorithm>
+#include <memory>
+
+namespace zim
+{
+  /**
+   * A blob is a pointer to data, potentially stored in an `Archive`.
+   *
+   * All `Blob`'s methods are threadsafe.
+   */
+  class Blob
+  {
+    public: // types
+      using DataPtr = std::shared_ptr<const char>;
+
+    public: // functions
+      /**
+       * Constuct a empty `Blob`
+       */
+      Blob();
+
+      /**
+       * Constuct `Blob` pointing to `data`.
+       *
+       * The created blob only point to the data and doesn't own it.
+       * User must care that data is not freed before using the blob.
+       */
+      Blob(const char* data, size_type size);
+
+      /**
+       * Constuct `Blob` pointing to `data`.
+       *
+       * The created blob shares the ownership on data.
+       */
+      Blob(const DataPtr& buffer, size_type size);
+
+      operator std::string() const { return std::string(_data.get(), _size); }
+      const char* data() const  { return _data.get(); }
+      const char* end() const   { return _data.get() + _size; }
+      size_type size() const     { return _size; }
+
+   private:
+     DataPtr _data;
+     size_type _size;
+  };
+
+  inline std::ostream& operator<< (std::ostream& out, const Blob& blob)
+  {
+    if (blob.data())
+      out.write(blob.data(), blob.size());
+    return out;
+  }
+
+  inline bool operator== (const Blob& b1, const Blob& b2)
+  {
+    return b1.size() == b2.size()
+        && std::equal(b1.data(), b1.data() + b1.size(), b2.data());
+  }
+}
+
+#endif // ZIM_BLOB_H
diff --git a/include/zim/entry.h b/include/zim/entry.h
new file mode 100644
index 0000000..6944aa6
--- /dev/null
+++ b/include/zim/entry.h
@@ -0,0 +1,95 @@
+/*
+ * Copyright (C) 2020 Matthieu Gautier <mgautier@kymeria.fr>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#ifndef ZIM_ENTRY_H
+#define ZIM_ENTRY_H
+
+#include "zim.h"
+
+#include <string>
+#include <memory>
+
+namespace zim
+{
+  class Item;
+  class Dirent;
+  class FileImpl;
+
+  /**
+   * An entry in an `Archive`.
+   *
+   * All `Entry`'s methods are threadsafe.
+   */
+  class Entry
+  {
+    public:
+      explicit Entry(std::shared_ptr<FileImpl> file_, entry_index_type idx_);
+
+      bool isRedirect() const;
+      std::string getTitle() const;
+      std::string getPath() const;
+
+      /** Get the item associated to the entry.
+       *
+       * An item is associated only if the entry is not a redirect.
+       * For convenience, if follow is true, return the item associated to the targeted entry.
+       *
+       * @param follow True if the redirection is resolved before getting the item. (false by default)
+       * @return The Item associated to the entry.
+       * @exception InvalidType if the entry is a redirection and follow is false.
+       */
+      Item getItem(bool follow=false) const;
+
+      /** Get the item associated to the target entry.
+       *
+       * If there is a chain of redirection, the whole chain is resolved
+       * and the item associted to the last entry is returned.
+       *
+       * @return the Item associated with the targeted entry.
+       * @exception InvalidType if the entry is not a redirection.
+       */
+      Item getRedirect() const;
+
+      /** Get the Entry targeted by the entry.
+       *
+       * @return The entry directly targeted by this redirect entry.
+       * @exception InvalidEntry if the entry is not a redirection.
+       */
+      Entry getRedirectEntry() const;
+
+      /** Get the index of the Entry targeted by the entry.
+       *
+       * @return The index of the entry directly targeted by this redirect
+       *         entry.
+       * @exception InvalidEntry if the entry is not a redirection.
+       */
+      entry_index_type getRedirectEntryIndex() const;
+
+      entry_index_type getIndex() const   { return m_idx; }
+
+    private:
+      std::shared_ptr<FileImpl> m_file;
+      entry_index_type m_idx;
+      std::shared_ptr<const Dirent> m_dirent;
+  };
+
+}
+
+#endif // ZIM_ENTRY_H
+
diff --git a/include/zim/error.h b/include/zim/error.h
new file mode 100644
index 0000000..48acd09
--- /dev/null
+++ b/include/zim/error.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (C) 2020 Matthieu Gautier <mgautier@kymeria.fr>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#ifndef ZIM_ERROR_H
+#define ZIM_ERROR_H
+
+#include <stdexcept>
+
+namespace zim
+{
+  class ZimFileFormatError : public std::runtime_error
+  {
+    public:
+      explicit ZimFileFormatError(const std::string& msg)
+        : std::runtime_error(msg)
+        { }
+  };
+
+  class InvalidType: public std::logic_error
+  {
+    public:
+      explicit InvalidType(const std::string& msg)
+        : std::logic_error(msg)
+      {}
+  };
+
+  class EntryNotFound : public std::runtime_error
+  {
+    public:
+      explicit EntryNotFound(const std::string& msg)
+       : std::runtime_error(msg)
+      {}
+  };
+}
+
+#endif // ZIM_ERROR_H
+
diff --git a/include/zim/item.h b/include/zim/item.h
new file mode 100644
index 0000000..765b78a
--- /dev/null
+++ b/include/zim/item.h
@@ -0,0 +1,105 @@
+/*
+ * Copyright (C) 2021 Veloman Yunkan
+ * Copyright (C) 2020 Matthieu Gautier <mgautier@kymeria.fr>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#ifndef ZIM_ITEM_H
+#define ZIM_ITEM_H
+
+#include "zim.h"
+#include "blob.h"
+#include <string>
+
+namespace zim
+{
+  class Dirent;
+  class FileImpl;
+
+  /**
+   * An `Item` in an `Archive`
+   *
+   * All `Item`'s methods are threadsafe.
+   */
+  class Item
+  {
+    public: // types
+      typedef std::pair<std::string, offset_type> DirectAccessInfo;
+
+    public: // functions
+      explicit Item(std::shared_ptr<FileImpl> file_, entry_index_type idx_);
+
+      std::string getTitle() const;
+      std::string getPath() const;
+      std::string getMimetype() const;
+
+      /** Get the data associated to the item
+       *
+       * Get the data of the item, starting at offset.
+       *
+       * @param offset The number of byte to skip at begining of the data.
+       * @return A blob corresponding to the data.
+       */
+      Blob getData(offset_type offset=0) const;
+
+      /** Get the data associated to the item
+       *
+       * Get the `size` bytes of data of the item, starting at offset.
+       *
+       * @param offset The number of byte to skip at begining of the data.
+       * @param size The number of byte to read.
+       * @return A blob corresponding to the data.
+       */
+      Blob getData(offset_type offset, size_type size) const;
+
+      /** The size of the item.
+       *
+       * @return The size (in byte) of the item.
+       */
+      size_type getSize() const;
+
+      /** Direct access information.
+       *
+       * Some item are stored raw in the zim file.
+       * If possible, this function give information about which file
+       * and at which to read to get the data.
+       *
+       * It can be usefull as an optimisation when interacting with other system
+       * by reopeing the file and reading the content bypassing the libzim.
+       *
+       * @return A pair of filename/offset specifying where read the content.
+       *         If it is not possible to have direct access for this item,
+       *         return a pair of `{"", 0}`
+       */
+      DirectAccessInfo getDirectAccessInformation() const;
+
+      entry_index_type getIndex() const   { return m_idx; }
+
+#ifdef ZIM_PRIVATE
+      cluster_index_type getClusterIndex() const;
+#endif
+
+    private: // data
+      std::shared_ptr<FileImpl> m_file;
+      entry_index_type m_idx;
+      std::shared_ptr<const Dirent> m_dirent;
+  };
+
+}
+
+#endif // ZIM_ITEM_H
+
diff --git a/include/zim/meson.build b/include/zim/meson.build
new file mode 100644
index 0000000..b8b6c49
--- /dev/null
+++ b/include/zim/meson.build
@@ -0,0 +1,34 @@
+zim_config = configure_file(output : 'zim_config.h',
+                            configuration : public_conf)
+
+install_headers(
+    'archive.h',
+    'blob.h',
+    'error.h',
+    'item.h',
+    'entry.h',
+    'uuid.h',
+    'zim.h',
+    'suggestion.h',
+    'suggestion_iterator.h',
+    'tools.h',
+    'version.h',
+    zim_config,
+    subdir:'zim'
+)
+
+if xapian_dep.found()
+  install_headers(
+    'search.h',
+    'search_iterator.h',
+    subdir:'zim'
+  )
+endif
+
+install_headers(
+    'writer/item.h',
+    'writer/creator.h',
+    'writer/contentProvider.h',
+    subdir:'zim/writer'
+)
+
diff --git a/include/zim/search.h b/include/zim/search.h
new file mode 100644
index 0000000..2b4cb66
--- /dev/null
+++ b/include/zim/search.h
@@ -0,0 +1,224 @@
+/*
+ * Copyright (C) 2017-2021 Matthieu Gautier <mgautier@kymeria.fr>
+ * Copyright (C) 2021 Maneesh P M <manu.pm55@gmail.com>
+ * Copyright (C) 2007 Tommi Maekitalo
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#ifndef ZIM_SEARCH_H
+#define ZIM_SEARCH_H
+
+#include "search_iterator.h"
+#include "archive.h"
+#include <vector>
+#include <string>
+#include <map>
+
+namespace Xapian {
+  class Enquire;
+  class MSet;
+};
+
+namespace zim
+{
+
+class Archive;
+class InternalDataBase;
+class Query;
+class Search;
+class SearchResultSet;
+
+/**
+ * A Searcher is a object fulltext searching a set of Archives
+ *
+ * A Searcher is mainly used to create new `Search`
+ * Internaly, this is mainly a wrapper around a Xapian database.
+ *
+ * You should consider that all search operations are NOT threadsafe.
+ * It is up to you to protect your calls to avoid race competition.
+ * However, Searcher (and subsequent classes) do not maintain a global/share state.
+ * You can create several Searchers and use them in different threads.
+ */
+class Searcher
+{
+  public:
+    /** Searcher constructor.
+     *
+     * Construct a searcher on top of several archives (multi search).
+     *
+     * @param archives A list(vector) of archives to search on.
+     */
+    explicit Searcher(const std::vector<Archive>& archives);
+
+    /** Searcher constructor.
+     *
+     * Construct a searcher on top of on archive.
+     *
+     * @param archive A archive to search on.
+     */
+    explicit Searcher(const Archive& archive);
+    Searcher(const Searcher& other);
+    Searcher& operator=(const Searcher& other);
+    Searcher(Searcher&& other);
+    Searcher& operator=(Searcher&& other);
+    ~Searcher();
+
+    /** Add a archive to the searcher.
+     *
+     * Adding a archive to a searcher do not invalidate already created search.
+     */
+    Searcher& addArchive(const Archive& archive);
+
+    /** Create a search for a specific query.
+     *
+     * The search is made on all archives added to the Searcher.
+     *
+     * @param query The Query to search.
+     *
+     * @throws std::runtime_error if the searcher does not have a valid
+     *         FT database.
+     */
+    Search search(const Query& query);
+
+    /** Set the verbosity of search operations.
+     *
+     * @param verbose The verbose mode to set
+     */
+    void setVerbose(bool verbose);
+
+  private: // methods
+    void initDatabase();
+
+  private: // data
+    std::shared_ptr<InternalDataBase> mp_internalDb;
+    std::vector<Archive> m_archives;
+    bool m_verbose;
+};
+
+/**
+ * A Query represent a query.
+ *
+ * It describe what have to be searched and how.
+ * A Query is "database" independent.
+ */
+class Query
+{
+  public:
+    /** Query constructor.
+     *
+     * Create a empty query.
+     */
+    Query(const std::string& query = "");
+
+    /** Set the textual query of the Query.
+     *
+     * @param query The string to search for.
+     */
+    Query& setQuery(const std::string& query);
+
+    /** Set the geographical query of the Query.
+     *
+     * Some article may be geo positioned.
+     * You can search for articles in a certain distance of a point.
+     *
+     * @param latitude The latitute of the point.
+     * @param longitude The longitude of the point.
+     * @param distance The maximal distance from the point.
+     */
+    Query& setGeorange(float latitude, float longitude, float distance);
+
+    std::string m_query { "" };
+
+    bool m_geoquery { false };
+    float m_latitude { 0 };
+    float m_longitude { 0 };
+    float m_distance { 0 } ;
+};
+
+
+/**
+ * A Search represent a particular search, based on a `Searcher`.
+ *
+ * This is somehow the reunification of a `Searcher` (what to search on)
+ * and a `Query` (what to search for).
+ */
+class Search
+{
+    public:
+        Search(Search&& s);
+        Search& operator=(Search&& s);
+        ~Search();
+
+        /** Get a set of results for this search.
+         *
+         * @param start The begining of the range to get
+         *              (offset of the first result).
+         * @param maxResults The maximum number of results to return
+         *                   (offset of last result from the start of range).
+         */
+        const SearchResultSet getResults(int start, int maxResults) const;
+
+        /** Get the number of estimated results for this search.
+         *
+         * As the name suggest, it is a estimation of the number of results.
+         */
+        int getEstimatedMatches() const;
+
+    private: // methods
+        Search(std::shared_ptr<InternalDataBase> p_internalDb, const Query& query);
+        Xapian::Enquire& getEnquire() const;
+
+    private: // data
+         std::shared_ptr<InternalDataBase> mp_internalDb;
+         mutable std::unique_ptr<Xapian::Enquire> mp_enquire;
+         Query m_query;
+
+  friend class Searcher;
+};
+
+/**
+ * The `SearchResult` represent a range of results corresponding to a `Search`.
+ *
+ * It mainly allows to get a iterator.
+ */
+class SearchResultSet
+{
+  public:
+    typedef SearchIterator iterator;
+
+    /** The begin iterator on the result range. */
+    iterator begin() const;
+
+    /** The end iterator on the result range. */
+    iterator end() const;
+
+    /** The size of the SearchResult (end()-begin()) */
+    int size() const;
+
+  private:
+    SearchResultSet(std::shared_ptr<InternalDataBase> p_internalDb, Xapian::MSet&& mset);
+    SearchResultSet(std::shared_ptr<InternalDataBase> p_internalDb);
+
+  private: // data
+    std::shared_ptr<InternalDataBase> mp_internalDb;
+    std::shared_ptr<Xapian::MSet> mp_mset;
+  friend class Search;
+};
+
+} //namespace zim
+
+#endif // ZIM_SEARCH_H
diff --git a/include/zim/search_iterator.h b/include/zim/search_iterator.h
new file mode 100644
index 0000000..a8c98f0
--- /dev/null
+++ b/include/zim/search_iterator.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (C) 2021 Maneesh P M <manu.pm55@gmail.com>
+ * Copyright (C) 2020 Matthieu Gautier <mgautier@kymeria.fr>
+ * Copyright (C) 2006 Tommi Maekitalo
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#ifndef ZIM_SEARCH_ITERATOR_H
+#define ZIM_SEARCH_ITERATOR_H
+
+#include <memory>
+#include <iterator>
+#include "entry.h"
+#include "archive.h"
+#include "uuid.h"
+
+namespace zim
+{
+class SearchResultSet;
+
+class SearchIterator : public std::iterator<std::bidirectional_iterator_tag, Entry>
+{
+    friend class zim::SearchResultSet;
+    public:
+        SearchIterator();
+        SearchIterator(const SearchIterator& it);
+        SearchIterator& operator=(const SearchIterator& it);
+        SearchIterator(SearchIterator&& it);
+        SearchIterator& operator=(SearchIterator&& it);
+        ~SearchIterator();
+
+        bool operator== (const SearchIterator& it) const;
+        bool operator!= (const SearchIterator& it) const;
+
+        SearchIterator& operator++();
+        SearchIterator operator++(int);
+        SearchIterator& operator--();
+        SearchIterator operator--(int);
+
+        std::string getPath() const;
+        std::string getTitle() const;
+        int getScore() const;
+        std::string getSnippet() const;
+        int getWordCount() const;
+        int getSize() const;
+        int getFileIndex() const;
+        Uuid getZimId() const;
+        reference operator*() const;
+        pointer operator->() const;
+
+#ifdef ZIM_PRIVATE
+        std::string getDbData() const;
+#endif
+
+    private:
+        struct InternalData;
+        std::unique_ptr<InternalData> internal;
+        SearchIterator(InternalData* internal_data);
+
+        bool isEnd() const;
+};
+
+} // namespace zim
+
+#endif // ZIM_SEARCH_ITERATOR_H
diff --git a/include/zim/suggestion.h b/include/zim/suggestion.h
new file mode 100644
index 0000000..c8020ad
--- /dev/null
+++ b/include/zim/suggestion.h
@@ -0,0 +1,188 @@
+/*
+ * Copyright (C) 2021 Maneesh P M <manu.pm55@gmail.com>
+ * Copyright (C) 2017-2021 Matthieu Gautier <mgautier@kymeria.fr>
+ * Copyright (C) 2007 Tommi Maekitalo
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#ifndef ZIM_SUGGESTION_H
+#define ZIM_SUGGESTION_H
+
+#include "suggestion_iterator.h"
+#include "archive.h"
+
+#if defined(LIBZIM_WITH_XAPIAN)
+namespace Xapian {
+  class Enquire;
+  class MSet;
+};
+#endif
+
+namespace zim
+{
+
+class SuggestionSearcher;
+class SuggestionSearch;
+class SuggestionIterator;
+class SuggestionDataBase;
+
+/**
+ * A SuggestionSearcher is a object suggesting over titles of an Archive
+ *
+ * A SuggestionSearcher is mainly used to create new `SuggestionSearch`
+ * Internaly, this is a wrapper around a SuggestionDataBase with may or may not
+ * include a Xapian index.
+ *
+ * You should consider that all search operations are NOT threadsafe.
+ * It is up to you to protect your calls to avoid race competition.
+ * However, SuggestionSearcher (and subsequent classes) do not maintain a global/
+ * share state You can create several Searchers and use them in different threads.
+ */
+class SuggestionSearcher
+{
+  public:
+    /** SuggestionSearcher constructor.
+     *
+     * Construct a SuggestionSearcher on top of an archive.
+     *
+     * @param archive An archive to suggest on.
+     */
+    explicit SuggestionSearcher(const Archive& archive);
+
+    SuggestionSearcher(const SuggestionSearcher& other);
+    SuggestionSearcher& operator=(const SuggestionSearcher& other);
+    SuggestionSearcher(SuggestionSearcher&& other);
+    SuggestionSearcher& operator=(SuggestionSearcher&& other);
+    ~SuggestionSearcher();
+
+    /** Create a SuggestionSearch for a specific query.
+     *
+     * The search is made on the archive under the SuggestionSearcher.
+     *
+     * @param query The SuggestionQuery to search.
+     */
+    SuggestionSearch suggest(const std::string& query);
+
+    /** Set the verbosity of search operations.
+     *
+     * @param verbose The verbose mode to set
+     */
+    void setVerbose(bool verbose);
+
+  private: // methods
+    void initDatabase();
+
+  private: // data
+    std::shared_ptr<SuggestionDataBase> mp_internalDb;
+    Archive m_archive;
+    bool m_verbose;
+};
+
+/**
+ * A SuggestionSearch represent a particular suggestion search, based on a `SuggestionSearcher`.
+ */
+class SuggestionSearch
+{
+    public:
+        SuggestionSearch(SuggestionSearch&& s);
+        SuggestionSearch& operator=(SuggestionSearch&& s);
+        ~SuggestionSearch();
+
+        /** Get a set of results for this search.
+         *
+         * @param start The begining of the range to get
+         *              (offset of the first result).
+         * @param maxResults The maximum number of results to return
+         *                   (offset of last result from the start of range).
+         */
+        const SuggestionResultSet getResults(int start, int maxResults) const;
+
+        /** Get the number of estimated results for this suggestion search.
+         *
+         * As the name suggest, it is a estimation of the number of results.
+         */
+        int getEstimatedMatches() const;
+
+    private: // methods
+        SuggestionSearch(std::shared_ptr<SuggestionDataBase> p_internalDb, const std::string& query);
+
+    private: // data
+         std::shared_ptr<SuggestionDataBase> mp_internalDb;
+         std::string m_query;
+
+  friend class SuggestionSearcher;
+
+#ifdef ZIM_PRIVATE
+    public:
+        // Close Xapian db to force range based search
+        const void forceRangeSuggestion();
+#endif
+
+// Xapian based methods and data
+#if defined(LIBZIM_WITH_XAPIAN)
+    private: // Xapian based methods
+        Xapian::Enquire& getEnquire() const;
+
+    private: // Xapian based data
+        mutable std::unique_ptr<Xapian::Enquire> mp_enquire;
+#endif  // LIBZIM_WITH_XAPIAN
+};
+
+/**
+ * The `SuggestionResultSet` represent a range of results corresponding to a `SuggestionSearch`.
+ *
+ * It mainly allows to get a iterator either based on an MSetIterator or a RangeIterator.
+ */
+class SuggestionResultSet
+{
+  public:
+    typedef SuggestionIterator iterator;
+    typedef Archive::EntryRange<EntryOrder::titleOrder> EntryRange;
+
+    /** The begin iterator on the result range. */
+    iterator begin() const;
+
+    /** The end iterator on the result range. */
+    iterator end() const;
+
+    /** The size of the SearchResult (end()-begin()) */
+    int size() const;
+
+  private: // data
+    std::shared_ptr<SuggestionDataBase> mp_internalDb;
+    std::shared_ptr<EntryRange> mp_entryRange;
+
+  private:
+    SuggestionResultSet(EntryRange entryRange);
+
+  friend class SuggestionSearch;
+
+// Xapian based methods and data
+#if defined(LIBZIM_WITH_XAPIAN)
+
+  private: // Xapian based methods
+    SuggestionResultSet(std::shared_ptr<SuggestionDataBase> p_internalDb, Xapian::MSet&& mset);
+
+  private: // Xapian based data
+    std::shared_ptr<Xapian::MSet> mp_mset;
+
+#endif  // LIBZIM_WITH_XAPIAN
+};
+
+} // namespace zim
+
+#endif // ZIM_SUGGESTION_H
diff --git a/include/zim/suggestion_iterator.h b/include/zim/suggestion_iterator.h
new file mode 100644
index 0000000..ec2f890
--- /dev/null
+++ b/include/zim/suggestion_iterator.h
@@ -0,0 +1,109 @@
+/*
+ * Copyright (C) 2021 Maneesh P M <manu.pm55@gmail.com>
+ * Copyright (C) 2020 Matthieu Gautier <mgautier@kymeria.fr>
+ * Copyright (C) 2006 Tommi Maekitalo
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#ifndef ZIM_SUGGESTION_ITERATOR_H
+#define ZIM_SUGGESTION_ITERATOR_H
+
+#include "archive.h"
+#include <iterator>
+
+namespace zim
+{
+class SuggestionResultSet;
+class SuggestionItem;
+class SearchIterator;
+
+class SuggestionIterator : public std::iterator<std::bidirectional_iterator_tag, SuggestionItem>
+{
+    typedef Archive::iterator<EntryOrder::titleOrder> RangeIterator;
+    friend class SuggestionResultSet;
+    public:
+        SuggestionIterator() = delete;
+        SuggestionIterator(const SuggestionIterator& it);
+        SuggestionIterator& operator=(const SuggestionIterator& it);
+        SuggestionIterator(SuggestionIterator&& it);
+        SuggestionIterator& operator=(SuggestionIterator&& it);
+        ~SuggestionIterator();
+
+        bool operator== (const SuggestionIterator& it) const;
+        bool operator!= (const SuggestionIterator& it) const;
+
+        SuggestionIterator& operator++();
+        SuggestionIterator operator++(int);
+        SuggestionIterator& operator--();
+        SuggestionIterator operator--(int);
+
+        Entry getEntry() const;
+
+        const SuggestionItem& operator*();
+        const SuggestionItem* operator->();
+
+    private: // data
+        struct SuggestionInternalData;
+        std::unique_ptr<RangeIterator> mp_rangeIterator;
+        std::unique_ptr<SuggestionItem> m_suggestionItem;
+
+    private: // methods
+        SuggestionIterator(RangeIterator rangeIterator);
+
+// Xapian based methods and data
+#if defined(LIBZIM_WITH_XAPIAN)
+#ifdef ZIM_PRIVATE
+    public:
+        std::string getDbData() const;
+#endif
+    private: // xapian based data
+        std::unique_ptr<SuggestionInternalData> mp_internal;
+
+    private: // xapian based methods
+        std::string getIndexPath() const;
+        std::string getIndexTitle() const;
+        std::string getIndexSnippet() const;
+        SuggestionIterator(SuggestionInternalData* internal_data);
+#endif  // LIBZIM_WITH_XAPIAN
+};
+
+class SuggestionItem
+{
+    public: // methods
+        std::string getTitle() const { return title; }
+        std::string getPath() const { return path; }
+        std::string getSnippet() const { return snippet; }
+
+        bool hasSnippet() const { return !snippet.empty(); }
+
+    private: // data
+        std::string title;
+        std::string path;
+        std::string snippet;
+
+    private: // methods
+        explicit SuggestionItem(std::string title, std::string path, std::string snippet = "")
+        :   title(title),
+            path(path),
+            snippet(snippet) {}
+
+    friend class SuggestionIterator;
+};
+
+} // namespace zim
+
+#endif // ZIM_SUGGESTION_ITERATOR_H
diff --git a/include/zim/tools.h b/include/zim/tools.h
new file mode 100644
index 0000000..80cb125
--- /dev/null
+++ b/include/zim/tools.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2022 Matthieu Gautier <mgautier@kymeria.fr>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#ifndef ZIM_TOOLS_H
+#define ZIM_TOOLS_H
+
+#include <zim/zim_config.h>
+
+
+namespace zim {
+#if defined(ENABLE_XAPIAN)
+
+  /** Helper function to set the icu data directory.
+   *
+   * On Android, we compile ICUÂ without data integrated
+   * in the library. So android application needs to set
+   * the data directory where ICU can find its data.
+   */
+  void setICUDataDirectory(const std::string& path);
+
+#endif
+}
+
+#endif // ZIM_TOOLS_H
diff --git a/include/zim/uuid.h b/include/zim/uuid.h
new file mode 100644
index 0000000..6544eca
--- /dev/null
+++ b/include/zim/uuid.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (C) 2021 Mannesh P M <manu.pm55@gmaile.com>
+ * Copyright (C) 2018 Matthieu Gautier <mgautier@kymeria.fr>
+ * Copyright (C) 2009 Tommi Maekitalo
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#ifndef ZIM_UUID_H
+#define ZIM_UUID_H
+
+#include <iosfwd>
+#include <algorithm>
+#include <cstring>
+#include <string>
+
+namespace zim
+{
+  struct Uuid
+  {
+    Uuid()
+    {
+      std::memset(data, 0, 16);
+    }
+
+    Uuid(const char uuid[16])
+    {
+      std::copy(uuid, uuid+16, data);
+    }
+
+    static Uuid generate(std::string value = "");
+
+    bool operator== (const Uuid& other) const
+      { return std::equal(data, data+16, other.data); }
+    bool operator!= (const Uuid& other) const
+      { return !(*this == other); }
+    unsigned size() const  { return 16; }
+
+    explicit operator std::string() const;
+
+    char data[16];
+  };
+
+  std::ostream& operator<< (std::ostream& out, const Uuid& uuid);
+
+}
+
+#endif // ZIM_UUID_H
diff --git a/include/zim/version.h b/include/zim/version.h
new file mode 100644
index 0000000..f94a532
--- /dev/null
+++ b/include/zim/version.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (C) 2021 Emmanuel Engelhart <kelson@kiwix.org>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#ifndef ZIM_VERSION_H
+#define ZIM_VERSION_H
+
+#include <string>
+#include <vector>
+
+namespace zim
+{
+  typedef std::vector<std::pair<std::string, std::string>> LibVersions;
+  LibVersions getVersions();
+  void printVersions(std::ostream& out = std::cout);
+}
+
+#endif // ZIM_VERSION_H
+
diff --git a/include/zim/writer/contentProvider.h b/include/zim/writer/contentProvider.h
new file mode 100644
index 0000000..eadd135
--- /dev/null
+++ b/include/zim/writer/contentProvider.h
@@ -0,0 +1,161 @@
+/*
+ * Copyright (C) 2020 Matthieu Gautier <mgautier@kymeria.fr>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#ifndef ZIM_WRITER_CONTENTPROVIDER_H
+#define ZIM_WRITER_CONTENTPROVIDER_H
+
+#include <stdexcept>
+#include <zim/blob.h>
+#include <zim/zim.h>
+#include <string>
+
+namespace zim
+{
+#ifdef _WIN32
+  #define DEFAULTFD zim::windows::FD
+  namespace windows {
+#else
+  #define DEFAULTFD zim::unix::FD
+  namespace unix {
+#endif
+    class FD;
+  }
+  namespace writer
+  {
+    /**
+     * `ContentProvider` is an abstract class in charge of providing the content to
+     * add in the archive to the creator.
+     */
+    class ContentProvider {
+      public:
+        virtual ~ContentProvider() = default;
+        /**
+         * The size of the content to add into the archive.
+         *
+         * @return the total size of the content.
+         */
+        virtual zim::size_type getSize() const = 0;
+
+        /**
+         * Return a blob to add to the archive.
+         *
+         * The returned blob doesn't have to represent the whole content.
+         * The feed method can return the whole content chunk by chunk or in
+         * one step.
+         * When the whole content has been returned, feed must return an empty blob
+         * (size == 0).
+         *
+         * This method will be called several times (at least twice) for
+         * each content to add.
+         *
+         * It is up to the implementation to manage correctly the data pointed by
+         * the returned blob.
+         * It may (re)use the same buffer between calls (rewriting its content),
+         * create a new buffer each time or make the blob point to a new region of
+         * a big buffer.
+         * It is up to the implementation to free any allocated memory.
+         *
+         * The data pointed by the blob must stay valid until the next call to feed.
+         * A call to feed ensure that the data returned by a previous call will not
+         * be used anymore.
+         */
+        virtual Blob feed() = 0;
+    };
+
+    /**
+     * StringProvider provide the content stored in a string.
+     */
+    class StringProvider : public ContentProvider {
+      public:
+        /**
+         * Create a provider using a string as content.
+         * The string content is copied and the reference don't have to be "keep" alive.
+         *
+         * @param content the content to serve.
+         */
+        explicit StringProvider(const std::string& content)
+          : content(content),
+            feeded(false)
+        {}
+        zim::size_type getSize() const { return content.size(); }
+        Blob feed();
+
+      protected:
+        std::string content;
+        bool feeded;
+    };
+
+    /**
+     * SharedStringProvider provide the content stored in a shared string.
+     *
+     * It is mostly the same thing that `StringProvider` but use a shared_ptr
+     * to avoid copy.
+     */
+    class SharedStringProvider : public ContentProvider {
+      public:
+        /**
+         * Create a provider using a string as content.
+         * The string content is not copied.
+         *
+         * @param content the content to serve.
+         */
+        explicit SharedStringProvider(std::shared_ptr<const std::string> content)
+          : content(content),
+            feeded(false)
+        {}
+        zim::size_type getSize() const { return content->size(); }
+        Blob feed();
+
+      protected:
+        std::shared_ptr<const std::string> content;
+        bool feeded;
+
+    };
+
+    /**
+     * FileProvider provide the content stored in file.
+     */
+    class FileProvider : public ContentProvider {
+      public:
+        /**
+         * Create a provider using file as content.
+         *
+         * @param filepath the path to the file to serve.
+         */
+        explicit FileProvider(const std::string& filepath);
+        ~FileProvider();
+        zim::size_type getSize() const { return size; }
+        Blob feed();
+
+      protected:
+        std::string filepath;
+        zim::size_type size;
+
+      private:
+        std::unique_ptr<char[]> buffer;
+        std::unique_ptr<DEFAULTFD> fd;
+        zim::offset_type offset;
+    };
+
+  }
+}
+
+#undef DEFAULTFD
+
+#endif // ZIM_WRITER_CONTENTPROVIDER_H
diff --git a/include/zim/writer/creator.h b/include/zim/writer/creator.h
new file mode 100644
index 0000000..00f6414
--- /dev/null
+++ b/include/zim/writer/creator.h
@@ -0,0 +1,228 @@
+/*
+ * Copyright (C) 2017-2021 Matthieu Gautier <mgautier@kymeria.fr>
+ * Copyright (C) 2020 Veloman Yunkan
+ * Copyright (C) 2009 Tommi Maekitalo
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#ifndef ZIM_WRITER_CREATOR_H
+#define ZIM_WRITER_CREATOR_H
+
+#include <memory>
+#include <zim/zim.h>
+#include <zim/writer/item.h>
+
+namespace zim
+{
+  class Fileheader;
+  namespace writer
+  {
+    class CreatorData;
+
+    /**
+     * The `Creator` is responsible to create a zim file.
+     *
+     * Once the `Creator` is instantiated, it can be configured with the
+     * `config*` methods.
+     * Then the creation process must be started with `startZimCreation`.
+     * Elements of the zim file can be added using the `add*` methods.
+     * The final steps is to call `finishZimCreation`.
+     *
+     * During the creation of the zim file (and before the call to `finishZimCreation`),
+     * some values must be set using the `set*` methods.
+     *
+     * All `add*` methods can throw a std::runtime_error exception if the entry
+     * cannot be added (mainly because a entry with the same path has already been added).
+     * It is up to the user to catch this exception and handle the error.
+     * The creator is still in a valid state and the creation can continue.
+     */
+    class Creator
+    {
+      public:
+        /**
+         * Creator constructor.
+         *
+         * @param verbose If the creator print verbose information.
+         * @param comptype The compression algorithm to use.
+         */
+        Creator();
+        virtual ~Creator();
+
+        /**
+         * Configure the verbosity of the creator
+         *
+         * @param verbose if the creator print verbose information.
+         * @return a reference to itself.
+         */
+        Creator& configVerbose(bool verbose);
+
+        /**
+         * Configure the compression algorithm to use.
+         *
+         * @param comptype the compression algorithm to use.
+         * @return a reference to itself.
+         */
+        Creator& configCompression(Compression compression);
+
+        /**
+         * Set the size of the created clusters.
+         *
+         * The creator will try to create cluster with (uncompressed) size
+         * as close as possible to targetSize without exceeding that limit.
+         * If not possible, the only such case being an item larger than targetSize,
+         * a separated cluster will be allocated for that oversized item.
+         *
+         * Be carefull with this value.
+         * Bigger value means more content put together, so a better compression ratio.
+         * But it means also that more decompression has to be made when reading a blob.
+         * If you don't know which value to put, don't use this method and let libzim
+         * use the default value.
+         *
+         * @param targetSize The target size of a cluster (in byte).
+         * @return a reference to itself.
+         */
+        Creator& configClusterSize(zim::size_type targetSize);
+
+        /**
+         * Configure the fulltext indexing feature.
+         *
+         * @param indexing True if we must fulltext index the content.
+         * @param language Language to use for the indexation.
+         * @return a reference to itself.
+         */
+        Creator& configIndexing(bool indexing, const std::string& language);
+
+        /**
+         * Set the number of thread to use for the internal worker.
+         *
+         * @param nbWorkers The number of workers to use.
+         * @return a reference to itself.
+         */
+        Creator& configNbWorkers(unsigned nbWorkers);
+
+        /**
+         * Start the zim creation.
+         *
+         * The creator must have been configured before calling this method.
+         *
+         * @param filepath the path of the zim file to create.
+         */
+        void startZimCreation(const std::string& filepath);
+
+        /**
+         * Add a item to the archive.
+         *
+         * @param item The item to add.
+         */
+        void addItem(std::shared_ptr<Item> item);
+
+        /**
+         * Add a metadata to the archive.
+         *
+         * @param name the name of the metadata
+         * @param content the content of the metadata
+         * @param mimetype the mimetype of the metadata.
+         *                 Only used to detect if the metadata must be compressed or not.
+         */
+        void addMetadata(const std::string& name, const std::string& content, const std::string& mimetype = "text/plain;charset=utf-8");
+
+        /**
+         * Add a metadata to the archive using a contentProvider instead of plain string.
+         *
+         * @param name the name of the metadata.
+         * @param provider the provider of the content of the metadata.
+         * @param mimetype the mimetype of the metadata.
+         *                 Only used to detect if the metadata must be compressed.
+         */
+        void addMetadata(const std::string& name, std::unique_ptr<ContentProvider> provider, const std::string& mimetype = "text/plain;charset=utf-8");
+
+        /**
+         * Add illustration to the archive.
+         *
+         * @param size the size (width and height) of the illustration.
+         * @param content the content of the illustration (must be a png content)
+         */
+        void addIllustration(unsigned int size, const std::string& content);
+
+        /**
+         * Add illustration to the archive.
+         *
+         * @param size the size (width and height) of the illustration.
+         * @param provider the provider of the content of the illustration (must be a png content)
+         */
+        void addIllustration(unsigned int size, std::unique_ptr<ContentProvider> provider);
+
+        /**
+         * Add a redirection to the archive.
+         *
+         * Hints (especially FRONT_ARTICLE) can be used to put the redirection
+         * in the front articles list.
+         * By default, redirections are not front article.
+         *
+         * @param path the path of the redirection.
+         * @param title the title of the redirection.
+         * @param targetpath the path of the target of the redirection.
+         * @param hints hints associated to the redirection.
+         */
+        void addRedirection(
+            const std::string& path,
+            const std::string& title,
+            const std::string& targetpath,
+            const Hints& hints = Hints());
+
+        /**
+         * Finalize the zim creation.
+         */
+        void finishZimCreation();
+
+        /**
+         * Set the path of the main page.
+         *
+         * @param mainPath The path of the main page.
+         */
+        void setMainPath(const std::string& mainPath) { m_mainPath = mainPath; }
+
+        /**
+         * Set the uuid of the the archive.
+         *
+         * @param uuid The uuid of the archive.
+         */
+        void setUuid(const zim::Uuid& uuid) { m_uuid = uuid; }
+
+      private:
+        std::unique_ptr<CreatorData> data;
+
+        // configuration
+        bool m_verbose = false;
+        Compression m_compression = Compression::Zstd;
+        bool m_withIndex = false;
+        size_t m_clusterSize;
+        std::string m_indexingLanguage;
+        unsigned m_nbWorkers = 4;
+
+        // zim data
+        std::string m_mainPath;
+        Uuid m_uuid = Uuid::generate();
+
+        void fillHeader(Fileheader* header) const;
+        void writeLastParts() const;
+    };
+  }
+
+}
+
+#endif // ZIM_WRITER_CREATOR_H
diff --git a/include/zim/writer/item.h b/include/zim/writer/item.h
new file mode 100644
index 0000000..3530d05
--- /dev/null
+++ b/include/zim/writer/item.h
@@ -0,0 +1,262 @@
+/*
+ * Copyright (C) 2020-2021 Matthieu Gautier <mgautier@kymeria.fr>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#ifndef ZIM_WRITER_ITEM_H
+#define ZIM_WRITER_ITEM_H
+
+#include <stdexcept>
+#include <zim/blob.h>
+#include <zim/zim.h>
+#include <zim/uuid.h>
+#include <string>
+
+#include <map>
+
+namespace zim
+{
+  namespace writer
+  {
+    enum HintKeys {
+      COMPRESS,
+      FRONT_ARTICLE,
+    };
+    using Hints = std::map<HintKeys, uint64_t>;
+
+    class ContentProvider;
+    class IndexData {
+      public:
+        using GeoPosition = std::tuple<bool, double, double>;
+        virtual ~IndexData() = default;
+        virtual bool hasIndexData() const = 0;
+        virtual std::string getTitle() const = 0;
+        virtual std::string getContent() const = 0;
+        virtual std::string getKeywords() const = 0;
+        virtual uint32_t getWordCount() const = 0;
+        virtual GeoPosition getGeoPosition() const = 0;
+    };
+
+    /**
+     * Item represent data to be added to the archive.
+     *
+     * This is a abstract class the user need to implement.
+     * libzim provides `BasicItem`, `StringItem` and `FileItem`
+     * to simplify (or avoid) this reimplementation.
+     */
+    class Item
+    {
+      public:
+        /**
+         * The path of the item.
+         *
+         * The path must be absolute.
+         * Path must be unique.
+         *
+         * @return the path of the item.
+         */
+        virtual std::string getPath() const = 0;
+
+        /**
+         * The title of the item.
+         *
+         * Item's title is indexed and is used for the suggestion system.
+         * Title don't have to be unique.
+         *
+         * @return the title of the item.
+         */
+        virtual std::string getTitle() const = 0;
+
+        /**
+         * The mimetype of the item.
+         *
+         * Mimetype is store within the content.
+         * It is also used to detect if the content must be compressed or not.
+         *
+         * @return the mimetype of the item.
+         */
+        virtual std::string getMimeType() const = 0;
+
+        /**
+         * The content provider of the item.
+         *
+         * The content provider is responsible to provide the content to the creator.
+         * The returned content provider must stay valid even after creator release
+         * its reference to the item.
+         *
+         * This method will be called once by libzim, in the main thread
+         * (but will be used in a different thread).
+         * The default IndexData will also call this method once (more)
+         * in the main thread (and use it in another thread).
+         *
+         * @return the contentProvider of the item.
+         */
+        virtual std::unique_ptr<ContentProvider> getContentProvider() const = 0;
+
+        /**
+         * The index data of the item.
+         *
+         * The index data is the data to index. (May be different from the content
+         * to store).
+         * The returned index data must stay valid even after creator release
+         * its reference to the item.
+         * This method will be called once by libzim if it is compiled with xapian
+         * (and is configured to index data).
+         *
+         * The returned IndexData will be used as source to index the item.
+         * If you don't want the item to be indexed, you can return a nullptr here
+         * or return a valid IndexData pointer which will return false to `hasIndexData`.
+         *
+         * If you don't implement this method, a default implementation will be used.
+         * The default implementation first checks for the mimetype and if the mimetype
+         * contains `text/html` it will use a contentProvider to get the content to index.
+         * The contentProvider will be created in the main thread but the data reading and
+         * parsing will occur in a different thread.
+         *
+         * All methods of `IndexData` will be called in a different (same) thread.
+         *
+         * @return the indexData of the item.
+         *         May return a nullptr if there is no indexData.
+         */
+        virtual std::shared_ptr<IndexData> getIndexData() const;
+
+        /**
+         * Hints to help the creator takes decision about the item.
+         *
+         * For now two hints are supported:
+         * - COMPRESS: Can be used to force the creator to put the item content
+         *   in a compressed cluster (if true) or not (if false).
+         *   If the hint is not provided, the decision is taken based on the
+         *   mimetype (textual or binary content ?)
+         * - FRONT_ARTICLE: Can (Should) be used to specify if the item is
+         *   a front article or not.
+         *   If the hint is not provided, the decision is taken based on the
+         *   mimetype (html or not ?)
+         *
+         * @return A list of hints.
+         */
+        virtual Hints getHints() const;
+
+        /**
+         * Returns the getHints() amended with default values based on mimetypes.
+         */
+        Hints getAmendedHints() const;
+        virtual ~Item() = default;
+    };
+
+    /**
+     * A BasicItem is a partial implementation of a Item.
+     *
+     * `BasicItem` provides a basic implementation for everything about an `Item`
+     * but the actual content of the item.
+     */
+    class BasicItem : public Item
+    {
+      public:
+        /**
+         * Create a BasicItem with the given path, mimetype and title.
+         *
+         * @param path the path of the item.
+         * @param mimetype the mimetype of the item.
+         * @param title the title of the item.
+         */
+        BasicItem(const std::string& path, const std::string& mimetype, const std::string& title, Hints hints)
+          : path(path),
+            mimetype(mimetype),
+            title(title),
+            hints(hints)
+        {}
+
+        std::string getPath() const { return path; }
+        std::string getTitle() const { return title; }
+        std::string getMimeType() const { return mimetype; }
+        Hints       getHints() const { return hints; }
+
+      protected:
+        std::string path;
+        std::string mimetype;
+        std::string title;
+        Hints hints;
+    };
+
+    /**
+     * A `StringItem` is a full implemented item where the content is stored in a string.
+     */
+    class StringItem : public BasicItem, public std::enable_shared_from_this<StringItem>
+    {
+      public:
+        /**
+         * Create a StringItem with the given path, mimetype, title and content.
+         *
+         * The parameters are the ones of the private constructor.
+         *
+         * @param path the path of the item.
+         * @param mimetype the mimetype of the item.
+         * @param title the title of the item.
+         * @param content the content of the item.
+         */
+        template<typename... Ts>
+        static std::shared_ptr<StringItem> create(Ts&&... params) {
+          return std::shared_ptr<StringItem>(new StringItem(std::forward<Ts>(params)...));
+        }
+
+        std::unique_ptr<ContentProvider> getContentProvider() const;
+
+      protected:
+        std::string content;
+
+      private:
+        StringItem(const std::string& path, const std::string& mimetype,
+                   const std::string& title, Hints hints, const std::string& content)
+          : BasicItem(path, mimetype, title, hints),
+            content(content)
+        {}
+
+
+
+    };
+
+    /**
+     * A `FileItem` is a full implemented item where the content is file.
+     */
+    class FileItem : public BasicItem
+    {
+      public:
+        /**
+         * Create a FileItem with the given path, mimetype, title and filenpath.
+         *
+         * @param path the path of the item.
+         * @param mimetype the mimetype of the item.
+         * @param title the title of the item.
+         * @param filepath the path of the file in the filesystem.
+         */
+        FileItem(const std::string& path, const std::string& mimetype,
+                 const std::string& title, Hints hints, const std::string& filepath)
+          : BasicItem(path, mimetype, title, hints),
+            filepath(filepath)
+        {}
+
+        std::unique_ptr<ContentProvider> getContentProvider() const;
+
+      protected:
+        std::string filepath;
+    };
+
+  }
+}
+
+#endif // ZIM_WRITER_ITEM_H
diff --git a/include/zim/zim.h b/include/zim/zim.h
new file mode 100644
index 0000000..7cd8984
--- /dev/null
+++ b/include/zim/zim.h
@@ -0,0 +1,117 @@
+/*
+ * Copyright (C) 2020-2021 Veloman Yunkan
+ * Copyright (C) 2018-2020 Matthieu Gautier <mgautier@kymeria.fr>
+ * Copyright (C) 2006 Tommi Maekitalo
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#ifndef ZIM_ZIM_H
+#define ZIM_ZIM_H
+
+#include <cstdint>
+
+#ifdef __GNUC__
+#define DEPRECATED __attribute__((deprecated))
+#elif defined(_MSC_VER)
+#define DEPRECATED __declspec(deprecated)
+#else
+#praga message("WARNING: You need to implement DEPRECATED for this compiler")
+#define DEPRECATED
+#endif
+
+
+#include <zim/zim_config.h>
+
+namespace zim
+{
+  // An index of an entry (in a zim file)
+  typedef uint32_t entry_index_type;
+
+  // An index of an cluster (in a zim file)
+  typedef uint32_t cluster_index_type;
+
+  // An index of a blog (in a cluster)
+  typedef uint32_t blob_index_type;
+
+  // The size of something (entry, zim, cluster, blob, ...)
+  typedef uint64_t size_type;
+
+  // An offset.
+  typedef uint64_t offset_type;
+
+  enum class Compression
+  {
+    None = 1,
+
+    // intermediate values correspond to compression
+    // methods that are no longer supported
+
+    Zstd = 5
+  };
+
+  static const char MimeHtmlTemplate[] = "text/x-zim-htmltemplate";
+
+  /**
+   * Various types of integrity checks performed by `zim::validate()`.
+   */
+  enum class IntegrityCheck
+  {
+    /**
+     * Validates the checksum of the ZIM file.
+     */
+    CHECKSUM,
+
+    /**
+     * Checks that offsets in UrlPtrList are valid.
+     */
+    DIRENT_PTRS,
+
+    /**
+     * Checks that dirents are properly sorted.
+     */
+    DIRENT_ORDER,
+
+    /**
+     * Checks that entries in the title index are valid and properly sorted.
+     */
+    TITLE_INDEX,
+
+    /**
+     * Checks that offsets in ClusterPtrList are valid.
+     */
+    CLUSTER_PTRS,
+
+    /**
+     * Checks that mime-type values in dirents are valid.
+     */
+    DIRENT_MIMETYPES,
+
+    ////////////////////////////////////////////////////////////////////////////
+    // End of integrity check types.
+    // COUNT must be the last one and denotes the count of all checks
+    ////////////////////////////////////////////////////////////////////////////
+
+    /**
+     * `COUNT` is not a valid integrity check type. It exists to tell the
+     * number of all supported integrity checks.
+     */
+    COUNT
+  };
+}
+
+#endif // ZIM_ZIM_H
+
diff --git a/meson.build b/meson.build
new file mode 100644
index 0000000..8e2c1bc
--- /dev/null
+++ b/meson.build
@@ -0,0 +1,101 @@
+project('libzim', ['c', 'cpp'],
+  version : '8.0.0',
+  license : 'GPL2',
+  default_options : ['c_std=c11', 'cpp_std=c++11'])
+
+if build_machine.system() != 'windows'
+  add_project_arguments('-D_LARGEFILE64_SOURCE=1', '-D_FILE_OFFSET_BITS=64', language: 'cpp')
+endif
+
+cpp = meson.get_compiler('cpp')
+sizeof_off_t = cpp.sizeof('off_t')
+sizeof_size_t = cpp.sizeof('size_t')
+
+private_conf = configuration_data()
+public_conf = configuration_data()
+
+private_conf.set('VERSION', '"@0@"'.format(meson.project_version()))
+public_conf.set('LIBZIM_VERSION', '"@0@"'.format(meson.project_version()))
+private_conf.set('DIRENT_CACHE_SIZE', get_option('DIRENT_CACHE_SIZE'))
+private_conf.set('DIRENT_LOOKUP_CACHE_SIZE', get_option('DIRENT_LOOKUP_CACHE_SIZE'))
+private_conf.set('CLUSTER_CACHE_SIZE', get_option('CLUSTER_CACHE_SIZE'))
+private_conf.set('LZMA_MEMORY_SIZE', get_option('LZMA_MEMORY_SIZE'))
+private_conf.set10('MMAP_SUPPORT_64', sizeof_off_t==8)
+private_conf.set10('ENV64BIT', sizeof_size_t==8)
+private_conf.set10('ENV32BIT', sizeof_size_t==4)
+if host_machine.system() == 'windows'
+    private_conf.set('ENABLE_USE_MMAP', false)
+    add_project_arguments('-DNOMINMAX', language: 'cpp')
+else
+    private_conf.set('ENABLE_USE_MMAP', get_option('USE_MMAP'))
+endif
+private_conf.set('ENABLE_USE_BUFFER_HEADER', get_option('USE_BUFFER_HEADER'))
+
+static_linkage = get_option('static-linkage')
+static_linkage = static_linkage or get_option('default_library')=='static'
+
+lzma_dep = dependency('liblzma', static:static_linkage)
+if static_linkage
+  add_project_arguments('-DLZMA_API_STATIC', language: 'cpp')
+endif
+
+zstd_dep = dependency('libzstd', static:static_linkage)
+
+if host_machine.system() == 'freebsd'
+    execinfo_dep = cpp.find_library('execinfo')
+endif
+
+if get_option('with_xapian')
+    xapian_dep = dependency('xapian-core', static:static_linkage)
+else
+    xapian_dep = dependency('', required:false)
+endif
+private_conf.set('ENABLE_XAPIAN', xapian_dep.found())
+public_conf.set('LIBZIM_WITH_XAPIAN', xapian_dep.found())
+
+pkg_requires = ['liblzma', 'libzstd']
+if build_machine.system() == 'windows'
+    extra_link_args = ['-lRpcrt4', '-lWs2_32', '-lwinmm', '-licuuc', '-licuin']
+    extra_cpp_args = ['-DSORTPP_PASS']
+else
+    extra_link_args = []
+    extra_cpp_args = []
+endif
+
+compiler = meson.get_compiler('cpp')
+if (compiler.get_id() == 'gcc' and build_machine.system() == 'linux') or host_machine.system() == 'freebsd'
+  # C++ std::thread is implemented using pthread on linux by gcc
+  thread_dep = dependency('threads')
+else
+  thread_dep = dependency('', required:false)
+endif
+
+if xapian_dep.found()
+    pkg_requires += ['xapian-core']
+    icu_dep = dependency('icu-i18n', static:static_linkage)
+    pkg_requires += ['icu-i18n']
+else
+    icu_dep = dependency('icu-i18n', required:false, static:static_linkage)
+endif
+
+gtest_dep = dependency('gtest', main:true, fallback:['gtest', 'gtest_main_dep'], required:false)
+
+inc = include_directories('include')
+
+subdir('include')
+subdir('scripts')
+subdir('static')
+subdir('src')
+subdir('examples')
+subdir('test')
+if get_option('doc')
+  subdir('docs')
+endif
+
+pkg_mod = import('pkgconfig')
+pkg_mod.generate(libraries : libzim,
+                 version : meson.project_version(),
+                 name : 'libzim',
+                 filebase : 'libzim',
+                 description : 'A Library to read/write ZIM files.',
+                 requires : pkg_requires)
diff --git a/meson_options.txt b/meson_options.txt
new file mode 100644
index 0000000..84242ed
--- /dev/null
+++ b/meson_options.txt
@@ -0,0 +1,22 @@
+option('CLUSTER_CACHE_SIZE', type : 'string', value : '16',
+  description : 'set cluster cache size to number (default:16)')
+option('DIRENT_CACHE_SIZE', type : 'string', value : '512',
+  description : 'set dirent cache size to number (default:512)')
+option('DIRENT_LOOKUP_CACHE_SIZE', type : 'string', value : '1024',
+  description : 'set dirent lookup cache size to number (default:1024)')
+option('LZMA_MEMORY_SIZE', type : 'string', value : '128',
+  description : 'set lzma uncompress memory in MB (default:128)')
+option('USE_MMAP', type: 'boolean', value: true,
+  description: 'Use mmap to avoid copy from file. (default:true, always false on windows)')
+option('USE_BUFFER_HEADER', type: 'boolean', value: true,
+  description: '''Copy (or use mmap) header index buffers. (default:true)
+Header index are used to access articles, having them in memory can improve access speed but on low memory devices it may use to many memory.
+If false, we directly read the index in the file at each article access.''')
+option('static-linkage', type : 'boolean', value : false,
+  description : 'Link statically with the dependencies.')
+option('doc', type : 'boolean', value : false,
+  description : 'Build the documentations.')
+option('with_xapian', type : 'boolean', value: true,
+  description: 'Build libzim with xapian support')
+option('test_data_dir', type : 'string', value: '',
+  description: 'Where the test data are. If not set, meson will use a internal directory in  the build dir. If you want to download the data in the specified directory you can use `meson download_test_data`. As a special value, you can pass `none` to deactivate test using external test data.')
diff --git a/scripts/download_test_data.py b/scripts/download_test_data.py
new file mode 100755
index 0000000..d320fb3
--- /dev/null
+++ b/scripts/download_test_data.py
@@ -0,0 +1,64 @@
+#!/usr/bin/env python3
+
+'''
+Copyright 2021 Matthieu Gautier <mgautier@kymeria.fr>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3 of the License, or any
+later version.
+
+This program is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301, USA.
+'''
+
+import argparse
+from pathlib import Path
+from urllib import request
+from urllib.error import *
+import tarfile
+import sys
+
+TEST_DATA_VERSION = "0.3"
+ARCHIVE_URL_TEMPL = "https://github.com/openzim/zim-testing-suite/releases/download/v{version}/zim-testing-suite-{version}.tar.gz"
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--version', '-v',
+                        help="The version to download.",
+                        default=TEST_DATA_VERSION)
+    parser.add_argument('--remove-top-dir',
+                        help="Remove the top directory when extracting",
+                        action='store_true')
+    parser.add_argument('outdir',
+                        help='The directory where to install the test data.')
+    args = parser.parse_args()
+
+    test_data_url = ARCHIVE_URL_TEMPL.format(version=args.version)
+
+    try:
+        with request.urlopen(test_data_url) as f:
+            with tarfile.open(fileobj=f, mode="r|*") as archive:
+                while True:
+                    member = archive.next()
+                    if member is None:
+                        break
+                    if args.remove_top_dir:
+                        member.name = '/'.join(member.name.split('/')[1:])
+                    archive.extract(member, path=args.outdir)
+
+    except HTTPError as e:
+        print("Error downloading archive at url : {}".format(test_data_url))
+        print(e)
+        sys.exit(1)
+    except OSError as e:
+        print("Error writing the test data on the file system.")
+        print(e)
+        sys.exit(1)
diff --git a/scripts/libzim-compile-resources b/scripts/libzim-compile-resources
new file mode 100755
index 0000000..e4993ba
--- /dev/null
+++ b/scripts/libzim-compile-resources
@@ -0,0 +1,201 @@
+#!/usr/bin/env python3
+
+'''
+Copyright 2016 Matthieu Gautier <mgautier@kymeria.fr>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3 of the License, or any
+later version.
+
+This program is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301, USA.
+'''
+
+import argparse
+import os.path
+import re
+
+def full_identifier(filename):
+    parts = os.path.normpath(filename).split(os.sep)
+    parts = [to_identifier(part) for part in parts]
+    print(filename, parts)
+    return parts
+
+def to_identifier(name):
+    ident = re.sub(r'[^0-9a-zA-Z]', '_', name)
+    if ident[0].isnumeric():
+        return "_"+ident
+    return ident
+
+resource_impl_template = """
+static const unsigned char {data_identifier}[] = {{
+    {resource_content}
+}};
+
+namespace RESOURCE {{
+{namespaces_open}
+const std::string {identifier} = init_resource("{env_identifier}", {data_identifier}, {resource_len});
+{namespaces_close}
+}}
+"""
+
+resource_getter_template = """
+    if (name == "{common_name}")
+        return RESOURCE::{identifier};
+"""
+
+resource_decl_template = """{namespaces_open}
+extern const std::string {identifier};
+{namespaces_close}"""
+
+class Resource:
+    def __init__(self, base_dirs, filename):
+        filename = filename.strip()
+        self.filename = filename
+        self.identifier = full_identifier(filename)
+        found = False
+        for base_dir in base_dirs:
+            try:
+                with open(os.path.join(base_dir, filename), 'rb') as f:
+                    self.data = f.read()
+                found = True
+                break
+            except FileNotFoundError:
+                continue
+        if not found:
+            raise Exception("Impossible to found {}".format(filename))
+
+    def dump_impl(self):
+        nb_row = len(self.data)//16 + (1 if len(self.data) % 16 else 0)
+        sliced = (self.data[i*16:(i+1)*16] for i in range(nb_row))
+
+        return resource_impl_template.format(
+            data_identifier="_".join([""]+self.identifier),
+            resource_content=",\n    ".join(", ".join("{:#04x}".format(i) for i in r) for r in sliced),
+            resource_len=len(self.data),
+            namespaces_open=" ".join("namespace {} {{".format(id) for id in self.identifier[:-1]), 
+            namespaces_close=" ".join(["}"]*(len(self.identifier)-1)),
+            identifier=self.identifier[-1],
+            env_identifier="RES_"+"_".join(self.identifier)+"_PATH"
+        )
+    
+    def dump_getter(self):
+        return resource_getter_template.format(
+            common_name=self.filename,
+            identifier="::".join(self.identifier)
+        )
+
+    def dump_decl(self):
+        return resource_decl_template.format(
+            namespaces_open=" ".join("namespace {} {{".format(id) for id in self.identifier[:-1]), 
+            namespaces_close=" ".join(["}"]*(len(self.identifier)-1)),
+            identifier=self.identifier[-1]
+        )
+    
+
+
+master_c_template = """//This file is automaically generated. Do not modify it.
+
+#include <stdlib.h>
+#include <fstream>
+#include "{include_file}"
+
+static std::string init_resource(const char* name, const unsigned char* content, int len)
+{{
+    char * resPath = getenv(name);
+    if (NULL == resPath)
+        return std::string(reinterpret_cast<const char*>(content), len);
+    
+    std::ifstream ifs(resPath);
+    if (!ifs.good())
+        return std::string(reinterpret_cast<const char*>(content), len);
+    return std::string( (std::istreambuf_iterator<char>(ifs)),
+                        (std::istreambuf_iterator<char>()   ));
+}}
+
+const std::string& getResource_{basename}(const std::string& name) {{
+{RESOURCES_GETTER}
+    throw ResourceNotFound("Resource not found.");
+}}
+
+{RESOURCES}
+
+"""
+
+def gen_c_file(resources, basename):
+    return master_c_template.format(
+       RESOURCES="\n\n".join(r.dump_impl() for r in resources),
+       RESOURCES_GETTER="\n\n".join(r.dump_getter() for r in resources),
+       include_file=basename,
+       basename=to_identifier(basename)
+    )
+ 
+
+
+master_h_template = """//This file is automaically generated. Do not modify it.
+#ifndef KIWIX_{BASENAME}
+#define KIWIX_{BASENAME}
+
+#include <string>
+#include <stdexcept>
+
+namespace RESOURCE {{
+    {RESOURCES}
+}};
+
+class ResourceNotFound : public std::runtime_error {{
+  public:
+    ResourceNotFound(const std::string& what_arg):
+      std::runtime_error(what_arg)
+    {{ }};
+}};
+
+const std::string& getResource_{basename}(const std::string& name);
+
+#define getResource(a) (getResource_{basename}(a))
+
+#endif // KIWIX_{BASENAME}
+
+"""
+
+def gen_h_file(resources, basename):
+    return master_h_template.format(
+       RESOURCES="\n    ".join(r.dump_decl() for r in resources),
+       BASENAME=basename.upper(),
+       basename=basename,
+    )
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--cxxfile',
+                        help='The Cpp file name to generate')
+    parser.add_argument('--hfile',
+                        help='The h file name to generate')
+    parser.add_argument('--source_dir',
+                        help="Additional directory where to look for resources.",
+                        action='append')
+    parser.add_argument('resource_file',
+                        help='The list of resources to compile.')
+    args = parser.parse_args()
+
+    base_dir = os.path.dirname(os.path.realpath(args.resource_file))
+    source_dir = args.source_dir or []
+    with open(args.resource_file, 'r') as f:
+        resources = [Resource([base_dir]+source_dir, filename)
+                        for filename in f.readlines()]
+
+    h_identifier = to_identifier(os.path.basename(args.hfile))
+    with open(args.hfile, 'w') as f:
+        f.write(gen_h_file(resources, h_identifier))
+
+    with open(args.cxxfile, 'w') as f:
+        f.write(gen_c_file(resources, os.path.basename(args.hfile)))
+
diff --git a/scripts/meson.build b/scripts/meson.build
new file mode 100644
index 0000000..7e215a9
--- /dev/null
+++ b/scripts/meson.build
@@ -0,0 +1,3 @@
+
+res_compiler = find_program('libzim-compile-resources')
+test_data_downloader = find_program('download_test_data.py')
diff --git a/src/_dirent.h b/src/_dirent.h
new file mode 100644
index 0000000..907e9e1
--- /dev/null
+++ b/src/_dirent.h
@@ -0,0 +1,128 @@
+/*
+ * Copyright (C) 2018-2021 Matthieu Gautier <mgautier@kymeria.fr>
+ * Copyright (C) 2020 Veloman Yankan
+ * Copyright (C) 2006 Tommi Maekitalo
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#ifndef ZIM_DIRENT_H
+#define ZIM_DIRENT_H
+
+#include <string>
+#include <zim/zim.h>
+#include <exception>
+#include <memory>
+
+#include "zim_types.h"
+#include "debug.h"
+
+namespace zim
+{
+  class Buffer;
+  class InvalidSize : public std::exception {};
+  class Dirent
+  {
+    protected:
+      uint16_t mimeType;
+
+      uint32_t version;
+
+      cluster_index_t clusterNumber;  // only used when redirect is false
+      blob_index_t blobNumber;    // only used when redirect is false
+
+      entry_index_t redirectIndex;  // only used when redirect is true
+
+      char ns;
+      std::string title;
+      std::string url;
+      std::string parameter;
+
+    public:
+      // these constants are put into mimeType field
+      static const uint16_t redirectMimeType = 0xffff;
+      static const uint16_t linktargetMimeType = 0xfffe;
+      static const uint16_t deletedMimeType = 0xfffd;
+
+      Dirent()
+        : mimeType(0),
+          version(0),
+          clusterNumber(0),
+          blobNumber(0),
+          redirectIndex(0),
+          ns('\0')
+      {}
+
+      bool isRedirect() const                 { return mimeType == redirectMimeType; }
+      bool isLinktarget() const               { return mimeType == linktargetMimeType; }
+      bool isDeleted() const                  { return mimeType == deletedMimeType; }
+      bool isArticle() const                  { return !isRedirect() && !isLinktarget() && !isDeleted(); }
+      uint16_t getMimeType() const            { return mimeType; }
+
+      uint32_t getVersion() const            { return version; }
+      void setVersion(uint32_t v)            { version = v; }
+
+      cluster_index_t getClusterNumber() const      { return isRedirect() ? cluster_index_t(0) : clusterNumber; }
+      blob_index_t  getBlobNumber() const         { return isRedirect() ? blob_index_t(0) : blobNumber; }
+
+      entry_index_t getRedirectIndex() const      { return isRedirect() ? redirectIndex : entry_index_t(0); }
+
+      char getNamespace() const               { return ns; }
+      const std::string& getTitle() const     { return title.empty() ? url : title; }
+      const std::string& getUrl() const       { return url; }
+      std::string getLongUrl() const;
+      const std::string& getParameter() const { return parameter; }
+
+      size_t getDirentSize() const
+      {
+        size_t ret = (isRedirect() ? 12 : 16) + url.size() + parameter.size() + 2;
+        if (title != url)
+          ret += title.size();
+        return ret;
+      }
+
+      void setTitle(const std::string& title_)
+      {
+        title = title_;
+      }
+
+      void setUrl(char ns_, const std::string& url_)
+      {
+        ns = ns_;
+        url = url_;
+      }
+
+      void setParameter(const std::string& parameter_)
+      {
+        parameter = parameter_;
+      }
+
+      void setRedirect(entry_index_t idx)
+      {
+        redirectIndex = idx;
+        mimeType = redirectMimeType;
+      }
+
+      void setItem(uint16_t mimeType_, cluster_index_t clusterNumber_, blob_index_t blobNumber_)
+      {
+        mimeType = mimeType_;
+        clusterNumber = clusterNumber_;
+        blobNumber = blobNumber_;
+      }
+  };
+}
+
+#endif // ZIM_DIRENT_H
diff --git a/src/archive.cpp b/src/archive.cpp
new file mode 100644
index 0000000..1c9e32c
--- /dev/null
+++ b/src/archive.cpp
@@ -0,0 +1,528 @@
+/*
+ * Copyright (C) 2021 Maneesh P M <manu.pm55@gmail.com>
+ * Copyright (C) 2020-2021 Veloman Yunkan
+ * Copyright (C) 2020-2021 Matthieu Gautier <mgautier@kymeria.fr>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#define ZIM_PRIVATE
+#include <zim/archive.h>
+#include <zim/entry.h>
+#include <zim/item.h>
+#include <zim/error.h>
+#include "fileimpl.h"
+#include "tools.h"
+#include "log.h"
+
+log_define("zim.archive")
+
+namespace zim
+{
+  Archive::Archive(const std::string& fname)
+    : m_impl(new FileImpl(fname))
+    { }
+
+#ifndef _WIN32
+  Archive::Archive(int fd)
+    : m_impl(new FileImpl(fd))
+    { }
+
+  Archive::Archive(int fd, offset_type offset, size_type size)
+    : m_impl(new FileImpl(fd, offset_t(offset), zsize_t(size)))
+    { }
+#endif
+
+  const std::string& Archive::getFilename() const
+  {
+    return m_impl->getFilename();
+  }
+
+  size_type Archive::getFilesize() const
+  {
+    return m_impl->getFilesize().v;
+  }
+
+  entry_index_type Archive::getAllEntryCount() const
+  {
+    return m_impl->getCountArticles().v;
+  }
+
+  entry_index_type Archive::getEntryCount() const
+  {
+    return m_impl->getUserEntryCount().v;
+  }
+
+  entry_index_type Archive::getArticleCount() const
+  {
+    if (m_impl->hasFrontArticlesIndex()) {
+      return m_impl->getFrontEntryCount().v;
+    } else if (m_impl->hasNewNamespaceScheme()) {
+      return m_impl->getNamespaceEntryCount('C').v;
+    } else {
+      return m_impl->getNamespaceEntryCount('A').v;
+    }
+  }
+
+  Uuid Archive::getUuid() const
+  {
+    return m_impl->getFileheader().getUuid();
+  }
+
+  Item Archive::getMetadataItem(const std::string& name) const
+  {
+    auto r = m_impl->findx('M', name);
+    if (!r.first) {
+      throw EntryNotFound("Cannot find metadata");
+    }
+    auto entry = Entry(m_impl, entry_index_type(r.second));
+    return entry.getItem(true);
+  }
+
+  std::string Archive::getMetadata(const std::string& name) const
+  {
+    auto item = getMetadataItem(name);
+    return item.getData();
+  }
+
+  std::vector<std::string> Archive::getMetadataKeys() const {
+    std::vector<std::string> ret;
+    auto start = m_impl->getNamespaceBeginOffset('M');
+    auto end = m_impl->getNamespaceEndOffset('M');
+    for (auto idx=start; idx!=end; idx++) {
+      auto dirent = m_impl->getDirent(idx);
+      ret.push_back(dirent->getUrl());
+    }
+    return ret;
+  }
+
+  zim::FileImpl::FindxResult findFavicon(FileImpl& impl)
+  {
+    for(auto ns:{'-', 'I'}) {
+      for (auto& path:{"favicon", "favicon.png"}) {
+        auto r = impl.findx(ns, path);
+        if (r.first) {
+          return r;
+        }
+      }
+    }
+    throw EntryNotFound("No favicon found.");
+  }
+
+  Item Archive::getIllustrationItem(unsigned int size) const {
+    std::ostringstream ss;
+    ss  << "Illustration_" << size << "x" << size << "@" << 1;
+    auto r = m_impl->findx('M', ss.str());
+    if (r.first) {
+      return getEntryByPath(entry_index_type(r.second)).getItem();
+    }
+    // We haven't found the exact entry. Let's "search" for a illustration and
+    // use the first one we found.
+#if 0
+    // We have decided to not implement fallback in case of wrong resolution for now.
+    // We keep this code for reference.
+    r = m_impl->findx('M', "Illustration");
+    auto entry = getEntryByPath(entry_index_type(r.second));
+    if (entry.getPath().find("Illustration") == 0) {
+      return entry.getItem();
+    }
+#endif
+    // For 48x48 illustration, return favicon for older zims.
+    if (size == 48) {
+      auto r = findFavicon(*m_impl);
+      return getEntryByPath(entry_index_type(r.second)).getItem(true);
+    }
+    throw EntryNotFound("Cannot find illustration item.");
+  }
+
+  std::set<unsigned int> Archive::getIllustrationSizes() const {
+    std::set<unsigned int> ret;
+    for(auto r = m_impl->findx('M', "Illustration_").second;
+        /*No exit test*/;
+        r++
+       ) {
+      try {
+        auto path = getEntryByPath(entry_index_type(r)).getPath();
+        if (path.find("Illustration_") != 0) {
+          break;
+        }
+        try {
+          ret.insert(parseIllustrationPathToSize(path));
+        } catch (...) {}
+      } catch (const std::out_of_range& e) {
+        break;
+      }
+    }
+    if (ret.find(48) == ret.end()) {
+      try {
+        // raise a exception if we cannot find the (old format) favicon.
+        findFavicon(*m_impl);
+        ret.insert(48);
+      } catch(EntryNotFound&) {}
+    }
+    return ret;
+  }
+
+  bool Archive::hasIllustration(unsigned int size) const {
+    try {
+      getIllustrationItem(size);
+      return true;
+    } catch (EntryNotFound& e) {
+      return false;
+    }
+  }
+
+  Entry Archive::getEntryByPath(entry_index_type idx) const
+  {
+    if (idx >= entry_index_type(m_impl->getCountArticles()))
+      throw std::out_of_range("entry index out of range");
+    return Entry(m_impl, idx);
+  }
+
+  Entry Archive::getEntryByPath(const std::string& path) const
+  {
+    if (m_impl->hasNewNamespaceScheme()) {
+      // Get path in user content.
+      auto r = m_impl->findx('C', path);
+      if (r.first) {
+        return Entry(m_impl, entry_index_type(r.second));
+      }
+      try {
+        // Path may come from a already stored from a old zim archive (bookmark),
+        // and so contains a namespace.
+        // We have to adapt the path to use the C namespace.
+        r = m_impl->findx('C', std::get<1>(parseLongPath(path)));
+        if (r.first) {
+          return Entry(m_impl, entry_index_type(r.second));
+        }
+      } catch (std::runtime_error&) {}
+    } else {
+      // Path should contains the namespace.
+      auto r = m_impl->findx(path);
+      if (r.first) {
+        return Entry(m_impl, entry_index_type(r.second));
+      }
+      // If not (bookmark) from a recent zim archive.
+      for (auto ns:{'A', 'I', 'J', '-'}) {
+        r = m_impl->findx(ns, path);
+        if (r.first) {
+          return Entry(m_impl, entry_index_type(r.second));
+        }
+      }
+    }
+
+    throw EntryNotFound("Cannot find entry");
+  }
+
+  Entry Archive::getEntryByTitle(entry_index_type idx) const
+  {
+    return Entry(m_impl, entry_index_type(m_impl->getIndexByTitle(title_index_t(idx))));
+  }
+
+  Entry Archive::getEntryByTitle(const std::string& title) const
+  {
+    for (auto ns:{'C', 'A', 'I', 'J', '-'}) {
+      log_trace("File::getArticleByTitle('" << ns << "', \"" << title << ')');
+      auto r = m_impl->findxByTitle(ns, title);
+      if (r.first)
+        return getEntryByTitle(entry_index_type(r.second));
+    }
+    throw EntryNotFound("Cannot find entry");
+  }
+
+  Entry Archive::getEntryByClusterOrder(entry_index_type idx) const
+  {
+     return Entry(m_impl, entry_index_type(m_impl->getIndexByClusterOrder(entry_index_t(idx))));
+  }
+
+  Entry Archive::getMainEntry() const {
+    auto r = m_impl->findx('W', "mainPage");
+    if (r.first) {
+      return getEntryByPath(entry_index_type(r.second));
+    }
+    auto& header = m_impl->getFileheader();
+    if (!header.hasMainPage()) {
+      throw EntryNotFound("No main page");
+    }
+    return getEntryByPath(header.getMainPage());
+  }
+
+  bool Archive::hasMainEntry() const {
+    return m_impl->getFileheader().hasMainPage();
+  }
+
+  Entry Archive::getRandomEntry() const {
+    if ( !m_impl->hasNewNamespaceScheme() ) {
+      const auto startOfNamespaceA = m_impl->getNamespaceBeginOffset('A');
+      const auto endOfNamespaceA = m_impl->getNamespaceEndOffset('A');
+      const auto n = (endOfNamespaceA - startOfNamespaceA).v;
+      if ( n == 0 ) {
+          throw EntryNotFound("Cannot find valid random entry (empty namespace 'A'");
+      }
+      return getEntryByPath(startOfNamespaceA.v + randomNumber(n-1));
+    } else {
+      auto frontEntryCount = m_impl->getFrontEntryCount().v;
+      if (frontEntryCount == 0) {
+        throw EntryNotFound("Cannot find valid random entry (no front entry at all)");
+      }
+
+      return getEntryByTitle(randomNumber(frontEntryCount-1));
+    }
+  }
+
+  bool Archive::hasFulltextIndex() const {
+    auto r = m_impl->findx('X', "fulltext/xapian");
+    if (!r.first) {
+      r = m_impl->findx('Z', "/fulltextIndex/xapian");
+    }
+    if (!r.first) {
+      return false;
+    }
+    auto entry = Entry(m_impl, entry_index_type(r.second));
+    auto item = entry.getItem(true);
+    auto accessInfo = item.getDirectAccessInformation();
+    return accessInfo.second;
+  }
+
+  bool Archive::hasTitleIndex() const {
+    auto r = m_impl->findx('X', "title/xapian");
+    if (!r.first) {
+      return false;
+    }
+    auto entry = Entry(m_impl, entry_index_type(r.second));
+    auto item = entry.getItem(true);
+    auto accessInfo = item.getDirectAccessInformation();
+    return accessInfo.second;
+  }
+
+  Archive::EntryRange<EntryOrder::pathOrder> Archive::iterByPath() const
+  {
+    return EntryRange<EntryOrder::pathOrder>(m_impl, m_impl->getStartUserEntry().v, m_impl->getEndUserEntry().v);
+  }
+
+  Archive::EntryRange<EntryOrder::titleOrder> Archive::iterByTitle() const
+  {
+    if (m_impl->hasFrontArticlesIndex()) {
+      // We have a front articles index. We can "simply" loop over all front entries.
+      return EntryRange<EntryOrder::titleOrder>(
+        m_impl,
+        0,
+        m_impl->getFrontEntryCount().v
+      );
+    } else if (!m_impl->hasNewNamespaceScheme())  {
+      // We are a old zim archive with namespace, we have to iterate on 'A' namespace.
+      return EntryRange<EntryOrder::titleOrder>(
+        m_impl,
+        m_impl->getNamespaceBeginOffset('A').v,
+        m_impl->getNamespaceEndOffset('A').v
+      );
+    } else {
+      // We are a zim archive without namespace but without specific articles listing.
+      // We don't the choice here, iterate on all user entries.
+      return EntryRange<EntryOrder::titleOrder>(
+        m_impl,
+        m_impl->getStartUserEntry().v,
+        m_impl->getEndUserEntry().v
+      );
+    }
+  }
+
+  Archive::EntryRange<EntryOrder::efficientOrder> Archive::iterEfficient() const
+  {
+    return EntryRange<EntryOrder::efficientOrder>(m_impl, 0, getEntryCount());
+  }
+
+  Archive::EntryRange<EntryOrder::pathOrder> Archive::findByPath(std::string path) const
+  {
+    // "url order" means that the entries are stored by long url ("NS/url)".
+    //
+    // If we really want to search by url whatever is the namespace, we would have to
+    // search in all "content" (A, I, J, -) namespaces and then merge the results.
+    //
+    // It would be pretty complex as we would need to have iterate hover several ranges
+    // in the same time. Let's enforce that path is the full path and search in whatever
+    // namespace is in it.
+
+    // We have to return two iterator for a range of entry where `path` is a prefix.
+    // - The begin iterator is a iterator to the first entry with `path`  as a prefix (or (range) end if none)
+    // - The end iterator is the iterator pass the last entry with `path` as a prefix (or (global) end)
+    //
+    // The findx return a iterator for the exact match or the one just after.
+    // So, for the begin iterator, we can simply use the index returned by findx
+    // For the end iterator we have to do the same but with a prefix "just after" the queried `path`
+    // So the end index will always be just after the prefix range. If there is no prefix range, both
+    // begin and end will be just after where it would be.
+    //
+    // Suposing a list of title :
+    // 0. aaaaaa
+    // 1. aaaaab
+    // 2. aabbaa
+    // 3. aabbbb
+    // 4. bbaaaa
+    // 5. bbbb
+    // 6. bbbbaa
+    // 7. bbbbbb
+    // 8. <past the end>
+
+    // If we search for prefix aabb, we must return 2/4
+    // A findx with aabb will return 2
+    // A findx with aabc will return 4
+    //
+    // If we search for prefix bbbb, we must return 5/8
+    // A findx with bbbb will return 5 (with exact match)
+    // A findx with bbbc will return 8
+    //
+    // If we search for prefix cccc, we must return 8/8
+    // A findx with cccc will return 8
+    // A findx with bbbc will return 8
+    //
+    // If we search for prefix a, we must return 0/4
+    // A findx with a will return 0
+    // A find with b will return 4
+    entry_index_t begin_idx, end_idx;
+    if (path.empty() || path == "/") {
+      begin_idx = m_impl->getStartUserEntry();
+      end_idx = m_impl->getEndUserEntry();
+    } else if (m_impl->hasNewNamespaceScheme()) {
+      begin_idx = m_impl->findx('C', path).second;
+      path.back()++;
+      end_idx = m_impl->findx('C', path).second;
+    } else {
+      char ns;
+      try {
+        std::tie(ns, path) = parseLongPath(path);
+      } catch (...) {
+        return Archive::EntryRange<EntryOrder::pathOrder>(m_impl, 0, 0);
+      }
+      begin_idx = m_impl->findx(ns, path).second;
+      if (path.empty()) {
+        ns++;
+      } else {
+        path.back()++;
+      }
+      end_idx = m_impl->findx(ns, path).second;
+    }
+    return Archive::EntryRange<EntryOrder::pathOrder>(m_impl, begin_idx.v, end_idx.v);
+  }
+
+  Archive::EntryRange<EntryOrder::titleOrder> Archive::findByTitle(std::string title) const
+  {
+    // "title order" means that the entries are stored by "NS/title" part.
+    // It is nice when we want to search for title in a specific namespace, but
+    // now we want to hide the namespace. It would be better if the "title order"
+    // would be real title order, whatever is the namespace.
+    //
+    // If we really want to search by title what ever is the namespace, we would have to
+    // search in all "content" namespace and then merge the results.
+    //
+    // The find by title is only used for the article (`A` namespace). So let's search
+    // only in it.
+
+    // See `Archive::findByPath` for the rational.
+    auto ns = m_impl->hasNewNamespaceScheme() ? 'C' : 'A';
+    auto begin_idx = m_impl->findxByTitle(ns, title).second;
+    title.back()++;
+    auto end_idx = m_impl->findxByTitle(ns, title).second;
+    return Archive::EntryRange<EntryOrder::titleOrder>(m_impl, begin_idx.v, end_idx.v);
+  }
+
+  bool Archive::hasChecksum() const
+  {
+    return m_impl->getFileheader().hasChecksum();
+  }
+
+  std::string Archive::getChecksum() const
+  {
+    return m_impl->getChecksum();
+  }
+
+  bool Archive::check() const
+  {
+    return m_impl->verify();
+  }
+
+  bool Archive::isMultiPart() const
+  {
+    return m_impl->is_multiPart();
+  }
+
+  bool Archive::hasNewNamespaceScheme() const
+  {
+    return m_impl->hasNewNamespaceScheme();
+  }
+
+  cluster_index_type Archive::getClusterCount() const
+  {
+    return cluster_index_type(m_impl->getCountClusters());
+  }
+
+  offset_type Archive::getClusterOffset(cluster_index_type idx) const
+  {
+    return offset_type(m_impl->getClusterOffset(cluster_index_t(idx)));
+  }
+
+  entry_index_type Archive::getMainEntryIndex() const
+  {
+    return m_impl->getFileheader().getMainPage();
+  }
+
+  template<>
+  entry_index_type
+  _toPathOrder<EntryOrder::pathOrder>(const FileImpl& impl, entry_index_type idx)
+  {
+    return idx;
+  }
+
+  template<>
+  entry_index_type
+  _toPathOrder<EntryOrder::titleOrder>(const FileImpl& impl, entry_index_type idx)
+  {
+    return impl.getIndexByTitle(title_index_t(idx)).v;
+  }
+
+  template<>
+  entry_index_type
+  _toPathOrder<EntryOrder::efficientOrder>(const FileImpl& impl, entry_index_type idx)
+  {
+    return impl.getIndexByClusterOrder(entry_index_t(idx)).v;
+  }
+
+  bool Archive::checkIntegrity(IntegrityCheck checkType)
+  {
+    return m_impl->checkIntegrity(checkType);
+  }
+
+  bool validate(const std::string& zimPath, IntegrityCheckList checksToRun)
+  {
+    try
+    {
+      Archive a(zimPath);
+      for ( size_t i = 0; i < checksToRun.size(); ++i )
+      {
+        if ( checksToRun.test(i) && !a.checkIntegrity(IntegrityCheck(i)) )
+          return false;
+      }
+    }
+    catch(ZimFileFormatError &exception)
+    {
+      std::cerr << exception.what() << std::endl;
+      return false;
+    }
+
+    return true;
+  }
+
+} // namespace zim
diff --git a/src/blob.cpp b/src/blob.cpp
new file mode 100644
index 0000000..39716c4
--- /dev/null
+++ b/src/blob.cpp
@@ -0,0 +1,65 @@
+/*
+ * Copyright (C) 2020 Veloman Yunkan
+ * Copyright (C) 2017-2020 Matthieu Gautier <mgautier@kymeria.fr>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+
+#include "zim/blob.h"
+#include "debug.h"
+#include "buffer.h"
+
+namespace zim {
+
+namespace
+{
+
+struct NoDelete
+{
+  template<class T> void operator()(T*) {}
+};
+
+// This shared_ptr is used as a source object for the std::shared_ptr
+// aliasing constructor (with the purpose of avoiding the control block
+// allocation) for the case when the referred data must not be deleted.
+static Blob::DataPtr nonOwnedDataPtr((char*)nullptr, NoDelete());
+
+} // unnamed namespace
+
+
+Blob::Blob()
+ : _data(nonOwnedDataPtr),
+   _size(0)
+{}
+
+Blob::Blob(const char* data, size_type size)
+ : _data(nonOwnedDataPtr, data),
+   _size(size)
+{
+  ASSERT(size, <, SIZE_MAX);
+  ASSERT(data, <, (void*)(SIZE_MAX-size));
+}
+
+Blob::Blob(const DataPtr& buffer, size_type size)
+ : _data(buffer),
+   _size(size)
+{}
+
+
+
+
+} //zim
diff --git a/src/buffer.cpp b/src/buffer.cpp
new file mode 100644
index 0000000..6cc7896
--- /dev/null
+++ b/src/buffer.cpp
@@ -0,0 +1,92 @@
+/*
+ * Copyright (C) 2020 Veloman Yunkan
+ * Copyright (C) 2017-2020 Matthieu Gautier <mgautier@kymeria.fr>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#include "buffer.h"
+
+#include <sys/stat.h>
+#include <cstdio>
+#include <cstdlib>
+#include <fcntl.h>
+#include <string.h>
+#include <errno.h>
+#include <sstream>
+
+#ifndef _WIN32
+#  include <sys/mman.h>
+#  include <unistd.h>
+#endif
+
+namespace zim {
+
+namespace
+{
+
+struct NoDelete
+{
+  template<class T> void operator()(T*) {}
+};
+
+// This shared_ptr is used as a source object for the std::shared_ptr
+// aliasing constructor (with the purpose of avoiding the control block
+// allocation) for the case when the referred data must not be deleted.
+static Buffer::DataPtr nonOwnedDataPtr((char*)nullptr, NoDelete());
+
+} // unnamed namespace
+
+const Buffer Buffer::sub_buffer(offset_t offset, zsize_t size) const
+{
+  ASSERT(offset.v, <=, m_size.v);
+  ASSERT(offset.v+size.v, <=, m_size.v);
+  auto sub_data = DataPtr(m_data, data(offset));
+  return Buffer(sub_data, size);
+}
+
+const Buffer Buffer::makeBuffer(const DataPtr& data, zsize_t size)
+{
+  return Buffer(data, size);
+}
+
+const Buffer Buffer::makeBuffer(const char* data, zsize_t size)
+{
+  return Buffer(DataPtr(nonOwnedDataPtr, data), size);
+}
+
+Buffer Buffer::makeBuffer(zsize_t size)
+{
+  if (0 == size.v) {
+    return Buffer(DataPtr(nonOwnedDataPtr, nullptr), size);
+  }
+  return Buffer(DataPtr(new char[size.v], std::default_delete<char[]>()), size);
+}
+
+Buffer::Buffer(const DataPtr& data, zsize_t size)
+  : m_size(size),
+    m_data(data)
+{
+  ASSERT(m_size.v, <, SIZE_MAX);
+}
+
+const char*
+Buffer::data(offset_t offset) const {
+  ASSERT(offset.v, <=, m_size.v);
+  return m_data.get() + offset.v;
+}
+
+} //zim
diff --git a/src/buffer.h b/src/buffer.h
new file mode 100644
index 0000000..b14e609
--- /dev/null
+++ b/src/buffer.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (C) 2020 Veloman Yunkan
+ * Copyright (C) 2017-2020 Matthieu Gautier <mgautier@kymeria.fr>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#ifndef ZIM_BUFFER_H_
+#define ZIM_BUFFER_H_
+
+#include <cstddef>
+#include <exception>
+#include <memory>
+#include <iostream>
+
+#include "config.h"
+#include "zim_types.h"
+#include "endian_tools.h"
+#include "debug.h"
+#include <zim/blob.h>
+
+namespace zim {
+
+class Buffer {
+  public: // types
+    typedef std::shared_ptr<const char> DataPtr;
+
+  public: // functions
+    static const Buffer makeBuffer(const char* data, zsize_t size);
+    static const Buffer makeBuffer(const DataPtr& data, zsize_t size);
+    static Buffer makeBuffer(zsize_t size);
+
+    const char* data(offset_t offset=offset_t(0)) const;
+
+    char at(offset_t offset) const {
+      return *(data(offset));
+    }
+    zsize_t size() const { return m_size; }
+    const Buffer sub_buffer(offset_t offset, zsize_t size) const;
+    operator Blob() const { return Blob(m_data, m_size.v); }
+
+  private: // functions
+    Buffer(const DataPtr& data, zsize_t size);
+
+  private: // data
+    zsize_t m_size;
+    DataPtr m_data;
+};
+
+} // zim namespace
+
+#endif //ZIM_BUFFER_H_
diff --git a/src/buffer_reader.cpp b/src/buffer_reader.cpp
new file mode 100644
index 0000000..374d4da
--- /dev/null
+++ b/src/buffer_reader.cpp
@@ -0,0 +1,70 @@
+/*
+ * Copyright (C) 2017-2021 Matthieu Gautier <mgautier@kymeria.fr>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#include <zim/zim.h>
+#include <zim/error.h>
+#include "buffer_reader.h"
+#include "buffer.h"
+
+#include <cstring>
+
+namespace zim {
+
+const Buffer BufferReader::get_buffer(offset_t offset, zsize_t size) const
+{
+  return source.sub_buffer(offset, size);
+}
+
+std::unique_ptr<const Reader> BufferReader::sub_reader(offset_t offset, zsize_t size) const
+{
+  auto sub_buff = get_buffer(offset, size);
+  std::unique_ptr<const Reader> sub_read(new BufferReader(sub_buff));
+  return sub_read;
+}
+
+zsize_t BufferReader::size() const
+{
+  return source.size();
+}
+
+offset_t BufferReader::offset() const
+{
+  return offset_t((offset_type)(static_cast<const void*>(source.data(offset_t(0)))));
+}
+
+
+void BufferReader::read(char* dest, offset_t offset, zsize_t size) const {
+  ASSERT(offset.v, <=, source.size().v);
+  ASSERT(offset+offset_t(size.v), <=, offset_t(source.size().v));
+  if (! size ) {
+    return;
+  }
+  memcpy(dest, source.data(offset), size.v);
+}
+
+
+char BufferReader::read(offset_t offset) const {
+  ASSERT(offset.v, <, source.size().v);
+  char dest;
+  dest = *source.data(offset);
+  return dest;
+}
+
+
+} // zim
diff --git a/src/buffer_reader.h b/src/buffer_reader.h
new file mode 100644
index 0000000..938aecc
--- /dev/null
+++ b/src/buffer_reader.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2017 Matthieu Gautier <mgautier@kymeria.fr>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#ifndef ZIM_BUFFER_READER_H_
+#define ZIM_BUFFER_READER_H_
+
+#include "reader.h"
+
+namespace zim {
+
+class BufferReader : public Reader {
+  public:
+    BufferReader(const Buffer& source)
+      : source(source) {}
+    virtual ~BufferReader() {};
+
+    zsize_t size() const;
+    offset_t offset() const;
+
+    void read(char* dest, offset_t offset, zsize_t size) const;
+    char read(offset_t offset) const;
+    const Buffer get_buffer(offset_t offset, zsize_t size) const;
+    std::unique_ptr<const Reader> sub_reader(offset_t offset, zsize_t size) const;
+
+  private:
+    const Buffer source;
+};
+
+};
+
+#endif // ZIM_BUFFER_READER_H_
diff --git a/src/bufferstreamer.h b/src/bufferstreamer.h
new file mode 100644
index 0000000..ff447d9
--- /dev/null
+++ b/src/bufferstreamer.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (C) 2020 Veloman Yunkan
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#ifndef ZIM_BUFFERSTREAMER_H
+#define ZIM_BUFFERSTREAMER_H
+
+#include "debug.h"
+
+#include <string.h>
+
+namespace zim
+{
+
+class BufferStreamer
+{
+public: // functions
+  BufferStreamer(const Buffer& buffer, zsize_t size)
+    : m_buffer(buffer),
+      m_current(buffer.data()),
+      m_size(size)
+  {}
+
+  explicit BufferStreamer(const Buffer& buffer)
+    : BufferStreamer(buffer, buffer.size())
+  {}
+
+  // Reads a value of the said type from the stream
+  //
+  // For best portability this function should be used with types of known
+  // bit-width (int32_t, uint16_t, etc) rather than builtin types with
+  // unknown bit-width (int, unsigned, etc).
+  template<typename T> T read()
+  {
+    const size_t N(sizeof(T));
+    char buf[N];
+    memcpy(buf, m_current, N);
+    skip(zsize_t(N));
+    return fromLittleEndian<T>(buf); // XXX: This handles only integral types
+  }
+
+  const char* current() const {
+    return m_current;
+  }
+
+  zsize_t left() const {
+    return m_size;
+  }
+
+  void skip(zsize_t nbBytes) {
+    m_current += nbBytes.v;
+    m_size -= nbBytes;
+  }
+
+private: // data
+  const Buffer m_buffer;
+  const char* m_current;
+  zsize_t m_size;
+};
+
+} // namespace zim
+
+#endif // ZIM_BUFDATASTREAM_H
diff --git a/src/cluster.cpp b/src/cluster.cpp
new file mode 100644
index 0000000..32afd8d
--- /dev/null
+++ b/src/cluster.cpp
@@ -0,0 +1,182 @@
+/*
+ * Copyright (C) 2016-2021 Matthieu Gautier <mgautier@kymeria.fr>
+ * Copyright (C) 2020 Veloman Yunkan
+ * Copyright (C) 2009 Tommi Maekitalo
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#include "cluster.h"
+#include <zim/blob.h>
+#include <zim/error.h>
+#include "buffer_reader.h"
+#include "endian_tools.h"
+#include "bufferstreamer.h"
+#include "decoderstreamreader.h"
+#include "rawstreamreader.h"
+#include <algorithm>
+#include <stdlib.h>
+#include <sstream>
+
+#include "compression.h"
+#include "log.h"
+
+#include "config.h"
+
+log_define("zim.cluster")
+
+#define log_debug1(e)
+
+namespace zim
+{
+
+namespace
+{
+
+std::unique_ptr<IStreamReader>
+getClusterReader(const Reader& zimReader, offset_t offset, Cluster::Compression* comp, bool* extended)
+{
+  uint8_t clusterInfo = zimReader.read(offset);
+  // Very old zim files used 0 as a "default" compression, which means no compression.
+  uint8_t compInfo = clusterInfo & 0x0F;
+  if(compInfo == 0) {
+    *comp = Cluster::Compression::None;
+  } else if (compInfo == int(Cluster::Compression::Zip)) {
+    throw std::runtime_error("zlib not enabled in this library");
+  } else if (compInfo == int(Cluster::Compression::Bzip2)) {
+    throw std::runtime_error("bzip2 not enabled in this library");
+  } else {
+    *comp = static_cast<Cluster::Compression>(compInfo);
+  }
+  *extended = clusterInfo & 0x10;
+  auto subReader = std::shared_ptr<const Reader>(zimReader.sub_reader(offset+offset_t(1)));
+
+  switch ( *comp ) {
+    case Cluster::Compression::None:
+      return std::unique_ptr<IStreamReader>(new RawStreamReader(subReader));
+    case Cluster::Compression::Lzma:
+      return std::unique_ptr<IStreamReader>(new DecoderStreamReader<LZMA_INFO>(subReader));
+    case Cluster::Compression::Zstd:
+      return std::unique_ptr<IStreamReader>(new DecoderStreamReader<ZSTD_INFO>(subReader));
+    default:
+      throw ZimFileFormatError("Invalid compression flag");
+  }
+}
+
+} // unnamed namespace
+
+  std::shared_ptr<Cluster> Cluster::read(const Reader& zimReader, offset_t clusterOffset)
+  {
+    Compression comp;
+    bool extended;
+    auto reader = getClusterReader(zimReader, clusterOffset, &comp, &extended);
+    return std::make_shared<Cluster>(std::move(reader), comp, extended);
+  }
+
+  Cluster::Cluster(std::unique_ptr<IStreamReader> reader_, Compression comp, bool isExtended)
+    : compression(comp),
+      isExtended(isExtended),
+      m_reader(std::move(reader_))
+  {
+    if (isExtended) {
+      read_header<uint64_t>();
+    } else {
+      read_header<uint32_t>();
+    }
+  }
+
+  /* This return the number of char read */
+  template<typename OFFSET_TYPE>
+  void Cluster::read_header()
+  {
+    // read first offset, which specifies, how many offsets we need to read
+    OFFSET_TYPE offset = m_reader->read<OFFSET_TYPE>();
+
+    size_t n_offset = offset / sizeof(OFFSET_TYPE);
+    const offset_t data_address(offset);
+
+    // read offsets
+    m_blobOffsets.clear();
+    m_blobOffsets.reserve(n_offset);
+    m_blobOffsets.push_back(offset_t(offset));
+
+    // Get the whole offsets data to avoid to many (system) call.
+    auto bufferSize = zsize_t(offset-sizeof(OFFSET_TYPE));
+    auto buffer = m_reader->sub_reader(bufferSize)->get_buffer(offset_t(0), bufferSize);
+    auto seqReader = BufferStreamer(buffer, bufferSize);
+    while (--n_offset)
+    {
+      OFFSET_TYPE new_offset = seqReader.read<OFFSET_TYPE>();
+      ASSERT(new_offset, >=, offset);
+
+      m_blobOffsets.push_back(offset_t(new_offset));
+      offset = new_offset;
+    }
+  }
+
+  zsize_t Cluster::getBlobSize(blob_index_t n) const
+  {
+      if (blob_index_type(n)+1 >= m_blobOffsets.size()) {
+        throw ZimFileFormatError("blob index out of range");
+      }
+      return zsize_t(m_blobOffsets[blob_index_type(n)+1].v - m_blobOffsets[blob_index_type(n)].v);
+  }
+
+  const Reader& Cluster::getReader(blob_index_t n) const
+  {
+    std::lock_guard<std::mutex> lock(m_readerAccessMutex);
+    for(blob_index_type current(m_blobReaders.size()); current<=n.v; ++current) {
+      auto blobSize = getBlobSize(blob_index_t(current));
+      if (blobSize.v > SIZE_MAX) {
+        m_blobReaders.push_back(std::unique_ptr<Reader>(new BufferReader(Buffer::makeBuffer(zsize_t(0)))));
+      } else {
+        m_blobReaders.push_back(m_reader->sub_reader(blobSize));
+      }
+    }
+    return *m_blobReaders[blob_index_type(n)];
+  }
+
+  Blob Cluster::getBlob(blob_index_t n) const
+  {
+    if (n < count()) {
+      const auto blobSize = getBlobSize(n);
+      if (blobSize.v > SIZE_MAX) {
+        return Blob();
+      }
+      return getReader(n).get_buffer(offset_t(0), blobSize);
+    } else {
+      return Blob();
+    }
+  }
+
+  Blob Cluster::getBlob(blob_index_t n, offset_t offset, zsize_t size) const
+  {
+    if (n < count()) {
+      const auto blobSize = getBlobSize(n);
+      if ( offset.v > blobSize.v ) {
+        return Blob();
+      }
+      size = std::min(size, zsize_t(blobSize.v-offset.v));
+      if (size.v > SIZE_MAX) {
+        return Blob();
+      }
+      return getReader(n).get_buffer(offset, size);
+    } else {
+      return Blob();
+    }
+  }
+
+}
diff --git a/src/cluster.h b/src/cluster.h
new file mode 100644
index 0000000..e8c9662
--- /dev/null
+++ b/src/cluster.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright (C) 2016-2021 Matthieu Gautier <mgautier@kymeria.fr>
+ * Copyright (C) 2020 Veloman Yunkan
+ * Copyright (C) 2020 Miguel Rocha
+ * Copyright (C) 2009 Tommi Maekitalo
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#ifndef ZIM_CLUSTER_H
+#define ZIM_CLUSTER_H
+
+#include <zim/zim.h>
+#include "buffer.h"
+#include "zim_types.h"
+#include "file_reader.h"
+#include <iosfwd>
+#include <vector>
+#include <memory>
+#include <mutex>
+
+#include "zim_types.h"
+#include "zim/error.h"
+
+namespace zim
+{
+  class Blob;
+  class Reader;
+  class IStreamReader;
+
+  class Cluster : public std::enable_shared_from_this<Cluster> {
+      typedef std::vector<offset_t> BlobOffsets;
+      typedef std::vector<std::unique_ptr<const Reader>> BlobReaders;
+
+    public:
+      // zim::Compression lists only compression methods supported by the
+      // writer. But on the reader side we need to deal with some historical
+      // compression types. Here we maintain the full list of compression
+      // types.
+      enum class Compression
+      {
+        None = 1,
+        Zip,      // Support is discontinued
+        Bzip2,    // Support is discontinued
+        Lzma,     // Supported only by the reader
+        Zstd
+      };
+
+    public:
+      const Compression compression;
+      const bool isExtended;
+
+    private:
+      std::unique_ptr<IStreamReader> m_reader;
+
+      // offsets of the blob boundaries relative to the start of the cluster data
+      // (*after* the first byte (clusterInfo))
+      // For a cluster with N blobs, this collection contains N+1 entries.
+      // The start of the first blob and the end of the last blob are included.
+      BlobOffsets m_blobOffsets;
+
+      mutable std::mutex m_readerAccessMutex;
+      mutable BlobReaders m_blobReaders;
+
+
+      template<typename OFFSET_TYPE>
+      void read_header();
+      const Reader& getReader(blob_index_t n) const;
+
+    public:
+      Cluster(std::unique_ptr<IStreamReader> reader, Compression comp, bool isExtended);
+      Compression getCompression() const   { return compression; }
+      bool isCompressed() const                { return compression != Compression::None; }
+
+      blob_index_t count() const               { return blob_index_t(m_blobOffsets.size() - 1); }
+
+      zsize_t getBlobSize(blob_index_t n) const;
+
+      offset_t getBlobOffset(blob_index_t n) const { return offset_t(1) + m_blobOffsets[blob_index_type(n)]; }
+      Blob getBlob(blob_index_t n) const;
+      Blob getBlob(blob_index_t n, offset_t offset, zsize_t size) const;
+
+      static std::shared_ptr<Cluster> read(const Reader& zimReader, offset_t clusterOffset);
+  };
+
+}
+
+#endif // ZIM_CLUSTER_H
diff --git a/src/compression.cpp b/src/compression.cpp
new file mode 100644
index 0000000..f145040
--- /dev/null
+++ b/src/compression.cpp
@@ -0,0 +1,175 @@
+/*
+ * Copyright (C) 2020-2021 Matthieu Gautier <mgautier@kymeria.fr>
+ * Copyright (C) 2020 Emmanuel Engelhart <kelson@kiwix.org>
+ * Copyright (C) 2020 Veloman Yunkan
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the impliedD
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#include "compression.h"
+
+#include "envvalue.h"
+
+#include <stdexcept>
+
+const std::string LZMA_INFO::name = "lzma";
+void LZMA_INFO::init_stream_decoder(stream_t* stream, char* raw_data)
+{
+  *stream = LZMA_STREAM_INIT;
+  unsigned memsize = zim::envMemSize("ZIM_LZMA_MEMORY_SIZE", LZMA_MEMORY_SIZE * 1024 * 1024);
+  auto errcode = lzma_stream_decoder(stream, memsize, 0);
+  if (errcode != LZMA_OK) {
+    throw std::runtime_error("Impossible to allocated needed memory to uncompress lzma stream");
+  }
+}
+
+CompStatus LZMA_INFO::stream_run_decode(stream_t* stream, CompStep step) {
+  return stream_run(stream, step);
+}
+
+CompStatus LZMA_INFO::stream_run(stream_t* stream, CompStep step)
+{
+  auto errcode = lzma_code(stream, step==CompStep::STEP?LZMA_RUN:LZMA_FINISH);
+  switch(errcode) {
+    case LZMA_BUF_ERROR:
+      return CompStatus::BUF_ERROR;
+    case LZMA_STREAM_END:
+      return CompStatus::STREAM_END;
+    case LZMA_OK:
+      return CompStatus::OK;
+    default: {
+      std::ostringstream ss;
+      ss << "Unexpected lzma status : " << errcode;
+      throw std::runtime_error(ss.str());
+    }
+  }
+}
+
+void LZMA_INFO::stream_end_decode(stream_t* stream)
+{
+  lzma_end(stream);
+}
+
+
+const std::string ZSTD_INFO::name = "zstd";
+
+ZSTD_INFO::stream_t::stream_t()
+: next_in(nullptr),
+  avail_in(0),
+  next_out(nullptr),
+  avail_out(0),
+  total_out(0),
+  encoder_stream(nullptr),
+  decoder_stream(nullptr)
+{}
+
+ZSTD_INFO::stream_t::~stream_t()
+{
+  if ( encoder_stream )
+    ::ZSTD_freeCStream(encoder_stream);
+
+  if ( decoder_stream )
+    ::ZSTD_freeDStream(decoder_stream);
+}
+
+void ZSTD_INFO::init_stream_decoder(stream_t* stream, char* raw_data)
+{
+  stream->decoder_stream = ::ZSTD_createDStream();
+  auto ret = ::ZSTD_initDStream(stream->decoder_stream);
+  if (::ZSTD_isError(ret)) {
+    throw std::runtime_error("Failed to initialize Zstd decompression");
+  }
+}
+
+void ZSTD_INFO::init_stream_encoder(stream_t* stream, char* raw_data)
+{
+  stream->encoder_stream = ::ZSTD_createCStream();
+  auto ret = ::ZSTD_initCStream(stream->encoder_stream, 19);
+  if (::ZSTD_isError(ret)) {
+    throw std::runtime_error("Failed to initialize Zstd compression");
+  }
+}
+
+CompStatus ZSTD_INFO::stream_run_encode(stream_t* stream, CompStep step) {
+  ::ZSTD_inBuffer inBuf;
+  inBuf.src = stream->next_in;
+  inBuf.size = stream->avail_in;
+  inBuf.pos = 0;
+
+  ::ZSTD_outBuffer outBuf;
+  outBuf.dst = stream->next_out;
+  outBuf.size = stream->avail_out;
+  outBuf.pos = 0;
+
+  auto ret = step == CompStep::STEP
+           ? ::ZSTD_compressStream(stream->encoder_stream, &outBuf, &inBuf)
+           : ::ZSTD_endStream(stream->encoder_stream, &outBuf);
+  stream->next_in += inBuf.pos;
+  stream->avail_in -= inBuf.pos;
+  stream->next_out += outBuf.pos;
+  stream->avail_out -= outBuf.pos;
+  stream->total_out += outBuf.pos;
+
+  if (::ZSTD_isError(ret)) {
+    throw std::runtime_error(::ZSTD_getErrorName(ret));
+  }
+
+  if ( step == CompStep::STEP ) {
+    if ( stream->avail_in != 0) {
+      ASSERT(stream->avail_out, ==, 0u);
+      return CompStatus::BUF_ERROR;
+    }
+  } else if ( ret > 0 ) {
+      return CompStatus::BUF_ERROR;
+  }
+
+  return CompStatus::OK;
+}
+
+CompStatus ZSTD_INFO::stream_run_decode(stream_t* stream, CompStep /*step*/) {
+  ::ZSTD_inBuffer inBuf;
+  inBuf.src = stream->next_in;
+  inBuf.size = stream->avail_in;
+  inBuf.pos = 0;
+
+  ::ZSTD_outBuffer outBuf;
+  outBuf.dst = stream->next_out;
+  outBuf.size = stream->avail_out;
+  outBuf.pos = 0;
+
+  auto ret = ::ZSTD_decompressStream(stream->decoder_stream, &outBuf, &inBuf);
+  stream->next_in += inBuf.pos;
+  stream->avail_in -= inBuf.pos;
+  stream->next_out += outBuf.pos;
+  stream->avail_out -= outBuf.pos;
+  stream->total_out += outBuf.pos;
+
+  if (::ZSTD_isError(ret))
+    throw std::runtime_error(::ZSTD_getErrorName(ret));
+
+  if (ret == 0)
+    return CompStatus::STREAM_END;
+
+  return CompStatus::BUF_ERROR;
+}
+
+void ZSTD_INFO::stream_end_decode(stream_t* stream)
+{
+}
+
+void ZSTD_INFO::stream_end_encode(stream_t* stream)
+{
+}
diff --git a/src/compression.h b/src/compression.h
new file mode 100644
index 0000000..c6e03e0
--- /dev/null
+++ b/src/compression.h
@@ -0,0 +1,293 @@
+/*
+ * Copyright (C) 2020-2021 Matthieu Gautier <mgautier@kymeria.fr>
+ * Copyright (C) 2020 Emmanuel Engelhart <kelson@kiwix.org>
+ * Copyright (C) 2020 Veloman Yunkan
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#ifndef _LIBZIM_COMPRESSION_
+#define _LIBZIM_COMPRESSION_
+
+#include <vector>
+#include "string.h"
+
+#include "file_reader.h"
+#include <zim/error.h>
+
+#include "config.h"
+
+#include <lzma.h>
+#include <zstd.h>
+
+#include "zim_types.h"
+#include "constants.h"
+
+//#define DEB(X) std::cerr << __func__ << " " << X << std::endl ;
+#define DEB(X)
+
+enum class CompStep {
+  STEP,
+  FINISH
+};
+
+enum class CompStatus {
+  OK,
+  STREAM_END,
+  BUF_ERROR,
+};
+
+enum class RunnerStatus {
+  OK,
+  NEED_MORE,
+  ERROR
+};
+
+struct LZMA_INFO {
+  typedef lzma_stream stream_t;
+  static const std::string name;
+  static void init_stream_decoder(stream_t* stream, char* raw_data);
+  static CompStatus stream_run_decode(stream_t* stream, CompStep step);
+  static CompStatus stream_run(stream_t* stream, CompStep step);
+  static void stream_end_decode(stream_t* stream);
+};
+
+
+struct ZSTD_INFO {
+  struct stream_t
+  {
+    const unsigned char* next_in;
+    size_t avail_in;
+    unsigned char* next_out;
+    size_t avail_out;
+    size_t total_out;
+
+    ::ZSTD_CStream* encoder_stream;
+    ::ZSTD_DStream* decoder_stream;
+
+    stream_t();
+    ~stream_t();
+  private:
+    stream_t(const stream_t& t) = delete;
+    void operator=(const stream_t& t) = delete;
+  };
+
+  static const std::string name;
+  static void init_stream_decoder(stream_t* stream, char* raw_data);
+  static void init_stream_encoder(stream_t* stream, char* raw_data);
+  static CompStatus stream_run_encode(stream_t* stream, CompStep step);
+  static CompStatus stream_run_decode(stream_t* stream, CompStep step);
+  static void stream_end_encode(stream_t* stream);
+  static void stream_end_decode(stream_t* stream);
+};
+
+
+namespace zim {
+
+template<typename INFO>
+class Uncompressor
+{
+  public:
+    Uncompressor(size_t initial_size) :
+      ret_data(new char[initial_size]),
+      data_size(initial_size)
+    {}
+    ~Uncompressor() = default;
+
+    void init(char* data) {
+      INFO::init_stream_decoder(&stream, data);
+      stream.next_out = (uint8_t*)ret_data.get();
+      stream.avail_out = data_size;
+    }
+
+    RunnerStatus feed(char* data, size_t size, CompStep step = CompStep::STEP) {
+      stream.next_in = (unsigned char*)data;
+      stream.avail_in = size;
+      while (true) {
+        auto errcode = INFO::stream_run_decode(&stream, step);
+        DEB((int)errcode)
+        switch (errcode) {
+          case CompStatus::BUF_ERROR:
+            if (stream.avail_in == 0 && stream.avail_out != 0)  {
+              // End of input stream.
+              // compressor hasn't recognize the end of the input stream but there is
+              // no more input.
+              return RunnerStatus::NEED_MORE;
+            } else {
+              // Not enought output size.
+              // Allocate more memory and continue the loop.
+              DEB("need memory " << data_size << " " << stream.avail_out << " " << stream.total_out)
+              data_size *= 2;
+              std::unique_ptr<char[]> new_ret_data(new char[data_size]);
+              memcpy(new_ret_data.get(), ret_data.get(), stream.total_out);
+              stream.next_out = (unsigned char*)(new_ret_data.get() + stream.total_out);
+              stream.avail_out = data_size - stream.total_out;
+              DEB(data_size << " " << stream.avail_out << " " << stream.avail_in)
+              ret_data = std::move(new_ret_data);
+            }
+            break;
+          case CompStatus::OK:
+            // On first call where lzma cannot progress (no output size).
+            // Lzma returnÂ OK. If we return NEED_MORE, then we will try to compress
+            // with new input data, but we should not as current one is not processed.
+            // We must do a second step to have te BUF_ERROR and handle thing correctly.
+            // If we have no more input, then we must ask for more.
+            if (stream.avail_in == 0) {
+              return RunnerStatus::NEED_MORE;
+            }
+            break;
+          case CompStatus::STREAM_END:
+            // End of compressed stream. Everything is ok.
+            return RunnerStatus::OK;
+          default:
+            // unreachable
+            return RunnerStatus::ERROR;
+        }
+      };
+      // unreachable
+      return RunnerStatus::NEED_MORE;
+    }
+
+    std::unique_ptr<char[]> get_data(zim::zsize_t* size) {
+      feed(nullptr, 0, CompStep::FINISH);
+      size->v = stream.total_out;
+      INFO::stream_end_decode(&stream);
+      return std::move(ret_data);
+    }
+
+  private:
+    std::unique_ptr<char[]> ret_data;
+    size_type data_size;
+    typename INFO::stream_t stream;
+};
+
+#define CHUNCK_SIZE ((zim::size_type)(1024))
+/**
+ * Uncompress data of the reader at startOffset.
+ *
+ * @param reader         The reader where the data is.
+ * @param startOffset    The offset where the data is in the reader.
+ * @param[out] dest_size The size of the uncompressed data.
+ * @return A pointer to the uncompressed data. This must be deleted (delete[])
+*/
+template<typename INFO>
+std::unique_ptr<char[]> uncompress(const zim::Reader* reader, zim::offset_t startOffset, zim::zsize_t* dest_size) {
+  // Use a compressor to compress the data.
+  // As we don't know the result size, neither the compressed size,
+  // we have to do chunk by chunk until decompressor is happy.
+  // Let's assume it will be something like the default clusterSize used at creation
+  Uncompressor<INFO> runner(DEFAULT_CLUSTER_SIZE);
+  // The input is a buffer of CHUNCK_SIZE char max. It may be less if the last chunk
+  // is at the end of the reader and the reader size is not a multiple of CHUNCK_SIZE.
+  std::vector<char> raw_data(CHUNCK_SIZE);
+
+  DEB("Init")
+  runner.init(raw_data.data());
+
+  zim::size_type availableSize = reader->size().v - startOffset.v;
+  auto ret = RunnerStatus::NEED_MORE;
+  while(ret != RunnerStatus::OK) {
+    if (ret == RunnerStatus::NEED_MORE and availableSize) {
+      zim::size_type inputSize = std::min(availableSize, CHUNCK_SIZE);
+      reader->read(raw_data.data(), startOffset, zim::zsize_t(inputSize));
+      startOffset.v += inputSize;
+      availableSize -= inputSize;
+      DEB("Step " << startOffset.v)
+      ret = runner.feed(raw_data.data(), inputSize);
+      DEB("Ret " << (int)ret)
+    }
+    if (ret == RunnerStatus::ERROR) {
+      throw zim::ZimFileFormatError(std::string("Invalid ") + INFO::name
+                               + std::string(" stream for cluster."));
+    }
+  }
+
+  DEB("Finish")
+  return runner.get_data(dest_size);
+}
+
+template<typename INFO>
+class Compressor
+{
+  public:
+    Compressor(size_t initial_size=1024*1024) :
+      ret_data(new char[initial_size]),
+      ret_size(initial_size)
+    {}
+
+    ~Compressor() = default;
+
+    void init(char* data) {
+      INFO::init_stream_encoder(&stream, data);
+      stream.next_out = (uint8_t*)ret_data.get();
+      stream.avail_out = ret_size;
+    }
+
+    RunnerStatus feed(const char* data, size_t size, CompStep step=CompStep::STEP) {
+      stream.next_in = (unsigned char*)data;
+      stream.avail_in = size;
+      while (true) {
+        auto errcode = INFO::stream_run_encode(&stream, step);
+        switch (errcode) {
+          case CompStatus::OK:
+            if (stream.avail_out == 0) {
+              // lzma return a OK return status the first time it runs out of output memory.
+              // The BUF_ERROR is returned only the second time we call a lzma_code.
+              continue;
+            } else {
+              return RunnerStatus::NEED_MORE;
+            }
+          case CompStatus::STREAM_END:
+            return RunnerStatus::NEED_MORE;
+          case CompStatus::BUF_ERROR:
+            if (stream.avail_out == 0) {
+              //Not enought output size
+              ret_size *= 2;
+              std::unique_ptr<char[]> new_ret_data(new char[ret_size]);
+              memcpy(new_ret_data.get(), ret_data.get(), stream.total_out);
+              stream.next_out = (unsigned char*)(new_ret_data.get() + stream.total_out);
+              stream.avail_out = ret_size - stream.total_out;
+              ret_data = std::move(new_ret_data);
+              continue;
+            } else {
+              return RunnerStatus::ERROR;
+            }
+          break;
+          default:
+            // unreachable
+            return RunnerStatus::ERROR;
+        };
+      };
+      // urreachable
+      return RunnerStatus::NEED_MORE;
+    }
+
+    std::unique_ptr<char[]> get_data(zim::zsize_t* size) {
+      feed(nullptr, 0, CompStep::FINISH);
+      INFO::stream_end_encode(&stream);
+      size->v = stream.total_out;
+      return std::move(ret_data);
+    }
+
+  private:
+    std::unique_ptr<char[]> ret_data;
+    size_t ret_size;
+    typename INFO::stream_t stream;
+};
+
+} // namespace zim
+
+#endif // _LIBZIM_COMPRESSION_
diff --git a/src/concurrent_cache.h b/src/concurrent_cache.h
new file mode 100644
index 0000000..0533d65
--- /dev/null
+++ b/src/concurrent_cache.h
@@ -0,0 +1,95 @@
+/*
+ * Copyright (C) 2021 Matthieu Gautier <mgautier@kymeria.fr>
+ * Copyright (C) 2020 Veloman Yunkan
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#ifndef ZIM_CONCURRENT_CACHE_H
+#define ZIM_CONCURRENT_CACHE_H
+
+#include "lrucache.h"
+
+#include <future>
+#include <mutex>
+
+namespace zim
+{
+
+/**
+   ConcurrentCache implements a concurrent thread-safe cache
+
+   Compared to zim::lru_cache, each access operation is slightly more expensive.
+   However, different slots of the cache can be safely accessed concurrently
+   with minimal blocking. Concurrent access to the same element is also
+   safe, and, in case of a cache miss, will block until that element becomes
+   available.
+ */
+template <typename Key, typename Value>
+class ConcurrentCache
+{
+private: // types
+  typedef std::shared_future<Value> ValuePlaceholder;
+  typedef lru_cache<Key, ValuePlaceholder> Impl;
+
+public: // types
+  explicit ConcurrentCache(size_t maxEntries)
+    : impl_(maxEntries)
+  {}
+
+  // Gets the entry corresponding to the given key. If the entry is not in the
+  // cache, it is obtained by calling f() (without any arguments) and the
+  // result is put into the cache.
+  //
+  // The cache as a whole is locked only for the duration of accessing
+  // the respective slot. If, in the case of the a cache miss, the generation
+  // of the missing element takes a long time, only attempts to access that
+  // element will block - the rest of the cache remains open to concurrent
+  // access.
+  template<class F>
+  Value getOrPut(const Key& key, F f)
+  {
+    std::promise<Value> valuePromise;
+    std::unique_lock<std::mutex> l(lock_);
+    const auto x = impl_.getOrPut(key, valuePromise.get_future().share());
+    l.unlock();
+    if ( x.miss() ) {
+      try {
+        valuePromise.set_value(f());
+      } catch (std::exception& e) {
+        drop(key);
+        throw;
+      }
+    }
+
+    return x.value().get();
+  }
+
+  bool drop(const Key& key)
+  {
+    std::unique_lock<std::mutex> l(lock_);
+    return impl_.drop(key);
+  }
+
+private: // data
+  Impl impl_;
+  std::mutex lock_;
+};
+
+} // namespace zim
+
+#endif // ZIM_CONCURRENT_CACHE_H
+
diff --git a/src/config.h.in b/src/config.h.in
new file mode 100644
index 0000000..77991c3
--- /dev/null
+++ b/src/config.h.in
@@ -0,0 +1,22 @@
+
+#mesondefine VERSION
+
+#mesondefine DIRENT_CACHE_SIZE
+
+#mesondefine DIRENT_LOOKUP_CACHE_SIZE
+
+#mesondefine CLUSTER_CACHE_SIZE
+
+#mesondefine LZMA_MEMORY_SIZE
+
+#mesondefine ENABLE_XAPIAN
+
+#mesondefine ENABLE_USE_MMAP
+
+#mesondefine ENABLE_USE_BUFFER_HEADER
+
+#mesondefine MMAP_SUPPORT_64
+
+#mesondefine ENV64BIT
+
+#mesondefine ENV32BIT
diff --git a/src/constants.h b/src/constants.h
new file mode 100644
index 0000000..2ed146c
--- /dev/null
+++ b/src/constants.h
@@ -0,0 +1,22 @@
+/*
+ * Copyright (C) 2021 Maneesh P M <manu.pm55@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#define ANCHOR_TERM "0posanchor "
+
+#define DEFAULT_CLUSTER_SIZE 2*1024*1024
diff --git a/src/debug.h b/src/debug.h
new file mode 100644
index 0000000..5bb96b8
--- /dev/null
+++ b/src/debug.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (C) 2017-2020 Matthieu Gautier <mgautier@kymeria.fr>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#ifndef DEBUG_H_
+#define DEBUG_H_
+
+#include <iostream>
+#include <sstream>
+#include <stdexcept>
+#include <stdlib.h>
+
+#if defined (NDEBUG)
+# define ASSERT(left, operator, right) (void(0))
+#else
+
+#if !defined(_WIN32) && !defined(__APPLE__) && !defined(__ANDROID__) && !defined(__EMSCRIPTEN__) && defined(__GNU_LIBRARY__)
+#include <execinfo.h>
+#endif
+
+template<typename T, typename U>
+void _on_assert_fail(const char* vara, const char* op, const char* varb,
+                     T a, U b, const char* file, int line)  {
+  std::ostringstream ss;
+  ss << "\nAssertion failed at "<< file << ":" << line << "\n " <<
+      vara << "[" << a << "] " << op << " " << varb << "[" << b << "]";
+  std::cerr << ss.str() << std::endl;
+
+#if !defined(_WIN32) && !defined(__APPLE__) && !defined(__ANDROID__) && !defined(__EMSCRIPTEN__) && defined(__GNU_LIBRARY__)
+  void *callstack[64];
+  size_t size;
+  size = backtrace(callstack, 64);
+  char** strings = backtrace_symbols(callstack, size);
+  for (size_t i=0; i<size; i++) {
+    std::cerr << strings[i] << std::endl;
+  }
+  free(strings);
+#endif
+  throw std::runtime_error(ss.str());
+}
+
+# define ASSERT(left, operator, right) do { auto _left = left; auto _right = right; if (!((_left) operator (_right))) _on_assert_fail(#left, #operator, #right, _left, _right, __FILE__, __LINE__);  } while(0)
+
+#endif
+
+#endif
diff --git a/src/decoderstreamreader.h b/src/decoderstreamreader.h
new file mode 100644
index 0000000..d48582b
--- /dev/null
+++ b/src/decoderstreamreader.h
@@ -0,0 +1,105 @@
+/*
+ * Copyright (C) 2020-2021 Matthieu Gautier <mgautier@kymeria.fr>
+ * Copyright (C) 2020 Veloman Yunkan
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#ifndef ZIM_DECODERSTREAMREADER_H
+#define ZIM_DECODERSTREAMREADER_H
+
+#include "compression.h"
+#include "istreamreader.h"
+
+namespace zim
+{
+
+template<typename Decoder>
+class DecoderStreamReader : public IStreamReader
+{
+private: // constants
+  enum { CHUNK_SIZE = 1024 };
+
+public: // functions
+  DecoderStreamReader(std::shared_ptr<const Reader> inputReader)
+    : m_encodedDataReader(inputReader),
+      m_currentInputOffset(0),
+      m_inputBytesLeft(inputReader->size()),
+      m_encodedDataChunk(Buffer::makeBuffer(zsize_t(CHUNK_SIZE)))
+  {
+    Decoder::init_stream_decoder(&m_decoderState, nullptr);
+    readNextChunk();
+  }
+
+  ~DecoderStreamReader()
+  {
+    Decoder::stream_end_decode(&m_decoderState);
+  }
+
+private: // functions
+  void readNextChunk()
+  {
+    const auto n = std::min(zsize_t(CHUNK_SIZE), m_inputBytesLeft);
+    m_encodedDataChunk = m_encodedDataReader->get_buffer(m_currentInputOffset, n);
+    m_currentInputOffset += n;
+    m_inputBytesLeft -= n;
+    // XXX: ugly C-style cast (casting away constness) on the next line
+    m_decoderState.next_in  = (unsigned char*)m_encodedDataChunk.data();
+    m_decoderState.avail_in = m_encodedDataChunk.size().v;
+  }
+
+  CompStatus decodeMoreBytes()
+  {
+    CompStep step = CompStep::STEP;
+    if ( m_decoderState.avail_in == 0 )
+    {
+      if ( m_inputBytesLeft.v == 0 )
+        step = CompStep::FINISH;
+      else
+        readNextChunk();
+    }
+
+    return Decoder::stream_run_decode(&m_decoderState, step);
+  }
+
+  void readImpl(char* buf, zsize_t nbytes) override
+  {
+    m_decoderState.next_out = (unsigned char*)buf;
+    m_decoderState.avail_out = nbytes.v;
+    while ( m_decoderState.avail_out != 0 )
+    {
+      // We don't car of the return code of decodeMoreBytes.
+      // We feed (or stop feeding) the decoder based on what
+      // we need to decode and the `avail_in`.
+      // If there is a error somehow, a exception will be thrown.
+      decodeMoreBytes();
+    }
+  }
+
+private: // types
+  typedef typename Decoder::stream_t DecoderState;
+
+private: // data
+  std::shared_ptr<const Reader> m_encodedDataReader;
+  offset_t m_currentInputOffset;
+  zsize_t m_inputBytesLeft; // count of bytes left in the input stream
+  DecoderState m_decoderState;
+  Buffer m_encodedDataChunk;
+};
+
+} // namespace zim
+
+#endif // ZIM_DECODERSTREAMREADER_H
diff --git a/src/dirent.cpp b/src/dirent.cpp
new file mode 100644
index 0000000..9c26212
--- /dev/null
+++ b/src/dirent.cpp
@@ -0,0 +1,152 @@
+/*
+ * Copyright (C) 2017-2020 Matthieu Gautier <mgautier@kymeria.fr>
+ * Copyright (C) 2006 Tommi Maekitalo
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#include "_dirent.h"
+#include "direntreader.h"
+#include <zim/zim.h>
+#include <zim/error.h>
+#include "buffer.h"
+#include "bufferstreamer.h"
+#include "endian_tools.h"
+#include "log.h"
+#include <algorithm>
+#include <cstring>
+
+log_define("zim.dirent")
+
+namespace zim
+{
+  //////////////////////////////////////////////////////////////////////
+  // Dirent
+  //
+
+  const uint16_t Dirent::redirectMimeType;
+  const uint16_t Dirent::linktargetMimeType;
+  const uint16_t Dirent::deletedMimeType;
+
+  bool DirentReader::initDirent(Dirent& dirent, const Buffer& direntData) const
+  {
+    BufferStreamer reader(direntData);
+    uint16_t mimeType = reader.read<uint16_t>();
+    bool redirect = (mimeType == Dirent::redirectMimeType);
+    bool linktarget = (mimeType == Dirent::linktargetMimeType);
+    bool deleted = (mimeType == Dirent::deletedMimeType);
+    uint8_t extraLen = reader.read<uint8_t>();
+    char ns = reader.read<char>();
+    uint32_t version = reader.read<uint32_t>();
+    dirent.setVersion(version);
+
+    if (redirect)
+    {
+      entry_index_type redirectIndex(reader.read<entry_index_type>());
+
+      log_debug("redirectIndex=" << redirectIndex);
+
+      dirent.setRedirect(entry_index_t(redirectIndex));
+    }
+    else if (linktarget || deleted)
+    {
+      log_debug("linktarget or deleted entry");
+      dirent.setItem(mimeType, cluster_index_t(0), blob_index_t(0));
+    }
+    else
+    {
+      log_debug("read article entry");
+
+      uint32_t clusterNumber = reader.read<uint32_t>();
+      uint32_t blobNumber = reader.read<uint32_t>();
+
+      log_debug("mimeType=" << mimeType << " clusterNumber=" << clusterNumber << " blobNumber=" << blobNumber);
+
+      dirent.setItem(mimeType, cluster_index_t(clusterNumber), blob_index_t(blobNumber));
+    }
+
+    std::string url;
+    std::string title;
+    std::string parameter;
+
+    log_debug("read url, title and parameters");
+
+    size_type url_size = strnlen(
+      reader.current(),
+      reader.left().v - extraLen
+    );
+    if (url_size >= reader.left().v) {
+      return false;
+    }
+    url = std::string(reader.current(), url_size);
+    reader.skip(zsize_t(url_size+1));
+
+    size_type title_size = strnlen(
+      reader.current(),
+      reader.left().v - extraLen
+    );
+    if (title_size >= reader.left().v) {
+      return false;
+    }
+    title = std::string(reader.current(), title_size);
+    reader.skip(zsize_t(title_size+1));
+
+    if (extraLen > reader.left().v) {
+      return false;
+    }
+    parameter = std::string(reader.current(), extraLen);
+    dirent.setUrl(ns, url);
+    dirent.setTitle(title);
+    dirent.setParameter(parameter);
+    return true;
+  }
+
+  std::shared_ptr<const Dirent> DirentReader::readDirent(offset_t offset)
+  {
+    const auto totalSize = mp_zimReader->size();
+    if (offset.v >= totalSize.v) {
+      throw ZimFileFormatError("Invalid dirent pointer");
+    }
+
+    // We don't know the size of the dirent because it depends of the size of
+    // the title, url and extra parameters.
+    // This is a pity but we have no choice.
+    // We cannot take a buffer of the size of the file, it would be really
+    // inefficient. Let's do try, catch and retry while chosing a smart value
+    // for the buffer size. Most dirent will be "Article" entry (header's size
+    // == 16) without extra parameters. Let's hope that url + title size will
+    // be < 256 and if not try again with a bigger size.
+
+    size_t bufferSize(std::min(size_type(256), mp_zimReader->size().v-offset.v));
+    auto dirent = std::make_shared<Dirent>();
+    std::lock_guard<std::mutex> lock(m_bufferMutex);
+    for ( ; ; bufferSize += 256 ) {
+      m_buffer.reserve(bufferSize);
+      mp_zimReader->read(m_buffer.data(), offset, zsize_t(bufferSize));
+      if ( initDirent(*dirent, Buffer::makeBuffer(m_buffer.data(), zsize_t(bufferSize))) )
+        return dirent;
+    }
+  }
+
+  std::string Dirent::getLongUrl() const
+  {
+    log_trace("Dirent::getLongUrl()");
+    log_debug("namespace=" << getNamespace() << " title=" << getTitle());
+
+    return std::string(1, getNamespace()) + '/' + getUrl();
+  }
+
+}
diff --git a/src/dirent_accessor.cpp b/src/dirent_accessor.cpp
new file mode 100644
index 0000000..73a8f42
--- /dev/null
+++ b/src/dirent_accessor.cpp
@@ -0,0 +1,92 @@
+/*
+ * Copyright (C) 2021 Matthieu Gautier <mgautier@kymeria.fr>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#include "dirent_accessor.h"
+
+#include "direntreader.h"
+#include "_dirent.h"
+#include "envvalue.h"
+
+#include <mutex>
+
+#include <zim/error.h>
+
+using namespace zim;
+
+DirectDirentAccessor::DirectDirentAccessor(std::shared_ptr<DirentReader> direntReader, std::unique_ptr<const Reader> urlPtrReader, entry_index_t direntCount)
+  : mp_direntReader(direntReader),
+    mp_urlPtrReader(std::move(urlPtrReader)),
+    m_direntCount(direntCount),
+    m_direntCache(envValue("ZIM_DIRENTCACHE", DIRENT_CACHE_SIZE)),
+    m_bufferDirentZone(256)
+{}
+
+std::shared_ptr<const Dirent> DirectDirentAccessor::getDirent(entry_index_t idx) const
+{
+  {
+    std::lock_guard<std::mutex> l(m_direntCacheLock);
+    auto v = m_direntCache.get(idx.v);
+    if (v.hit()) {
+      return v.value();
+    }
+  }
+
+  auto direntOffset = getOffset(idx);
+  auto dirent = readDirent(direntOffset);
+  std::lock_guard<std::mutex> l(m_direntCacheLock);
+  m_direntCache.put(idx.v, dirent);
+
+  return dirent;
+}
+
+offset_t DirectDirentAccessor::getOffset(entry_index_t idx) const
+{
+  if (idx >= m_direntCount) {
+    throw std::out_of_range("entry index out of range");
+  }
+  offset_t offset(mp_urlPtrReader->read_uint<offset_type>(offset_t(sizeof(offset_type)*idx.v)));
+  return offset;
+}
+
+std::shared_ptr<const Dirent> DirectDirentAccessor::readDirent(offset_t offset) const
+{
+  return mp_direntReader->readDirent(offset);
+}
+
+
+IndirectDirentAccessor::IndirectDirentAccessor(std::shared_ptr<const DirectDirentAccessor> direntAccessor, std::unique_ptr<const Reader> indexReader, title_index_t direntCount)
+  : mp_direntAccessor(direntAccessor),
+    mp_indexReader(std::move(indexReader)),
+    m_direntCount(direntCount)
+{}
+
+entry_index_t IndirectDirentAccessor::getDirectIndex(title_index_t idx) const
+{
+  if (idx >= m_direntCount) {
+    throw std::out_of_range("entry index out of range");
+  }
+  entry_index_t index(mp_indexReader->read_uint<entry_index_type>(offset_t(sizeof(entry_index_t)*idx.v)));
+  return index;
+}
+
+std::shared_ptr<const Dirent> IndirectDirentAccessor::getDirent(title_index_t idx) const
+{
+  auto directIndex = getDirectIndex(idx);
+  return mp_direntAccessor->getDirent(directIndex);
+}
diff --git a/src/dirent_accessor.h b/src/dirent_accessor.h
new file mode 100644
index 0000000..501e9b6
--- /dev/null
+++ b/src/dirent_accessor.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (C) 2021 Matthieu Gautier <mgautier@kymeria.fr>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#ifndef ZIM_DIRENT_ACCESSOR_H
+#define ZIM_DIRENT_ACCESSOR_H
+
+#include "zim_types.h"
+#include "debug.h"
+#include "lrucache.h"
+
+#include <memory>
+#include <mutex>
+#include <vector>
+
+namespace zim
+{
+
+class Dirent;
+class Reader;
+class DirentReader;
+
+/**
+ * DirectDirentAccessor is used to access a dirent from its index.
+ * It doesn't provide any "advanced" features like lookup or find.
+ *
+ * This is the base class to locate a dirent (offset) and read it.
+ *
+ */
+
+class DirectDirentAccessor
+{
+public: // functions
+  DirectDirentAccessor(std::shared_ptr<DirentReader> direntReader, std::unique_ptr<const Reader> urlPtrReader, entry_index_t direntCount);
+
+  offset_t    getOffset(entry_index_t idx) const;
+  std::shared_ptr<const Dirent> getDirent(entry_index_t idx) const;
+  entry_index_t getDirentCount() const  {  return m_direntCount; }
+
+private: // functions
+  std::shared_ptr<const Dirent> readDirent(offset_t) const;
+
+private: // data
+  std::shared_ptr<DirentReader>  mp_direntReader;
+  std::unique_ptr<const Reader>  mp_urlPtrReader;
+  entry_index_t                  m_direntCount;
+
+  mutable lru_cache<entry_index_type, std::shared_ptr<const Dirent>> m_direntCache;
+  mutable std::mutex m_direntCacheLock;
+
+  mutable std::vector<char>  m_bufferDirentZone;
+  mutable std::mutex         m_bufferDirentLock;
+};
+
+class IndirectDirentAccessor
+{
+  public:
+    IndirectDirentAccessor(std::shared_ptr<const DirectDirentAccessor>, std::unique_ptr<const Reader> indexReader, title_index_t direntCount);
+
+    entry_index_t getDirectIndex(title_index_t idx) const;
+    std::shared_ptr<const Dirent> getDirent(title_index_t idx) const;
+    title_index_t getDirentCount() const { return m_direntCount; }
+
+  private: // data
+    std::shared_ptr<const DirectDirentAccessor> mp_direntAccessor;
+    std::unique_ptr<const Reader>               mp_indexReader;
+    title_index_t                               m_direntCount;
+};
+
+} // namespace zim
+
+#endif // ZIM_DIRENT_ACCESSOR_H
diff --git a/src/dirent_lookup.h b/src/dirent_lookup.h
new file mode 100644
index 0000000..5daefa4
--- /dev/null
+++ b/src/dirent_lookup.h
@@ -0,0 +1,250 @@
+/*
+ * Copyright (C) 2020 Veloman Yunkan
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#ifndef ZIM_DIRENT_LOOKUP_H
+#define ZIM_DIRENT_LOOKUP_H
+
+#include "zim_types.h"
+#include "debug.h"
+#include "narrowdown.h"
+
+#include <algorithm>
+#include <map>
+#include <mutex>
+#include <vector>
+#include <cassert>
+
+namespace zim
+{
+
+template<class TConfig>
+class DirentLookup
+{
+public: // types
+  typedef typename TConfig::DirentAccessorType DirentAccessor;
+  typedef typename TConfig::index_t index_t;
+  typedef std::pair<bool, index_t> Result;
+
+public: // functions
+  explicit DirentLookup(const DirentAccessor* _direntAccessor);
+
+  index_t getNamespaceRangeBegin(char ns) const;
+  index_t getNamespaceRangeEnd(char ns) const;
+
+  Result find(char ns, const std::string& key) const;
+
+protected: // functions
+  int compareWithDirentAt(char ns, const std::string& key, entry_index_type i) const;
+  Result findInRange(entry_index_type l, entry_index_type u, char ns, const std::string& key) const;
+  Result binarySearchInRange(entry_index_type l, entry_index_type u, char ns, const std::string& key) const;
+
+protected: // types
+  typedef std::map<char, index_t> NamespaceBoundaryCache;
+
+protected: // data
+  const DirentAccessor& direntAccessor;
+  const entry_index_type direntCount;
+
+  mutable NamespaceBoundaryCache namespaceBoundaryCache;
+  mutable std::mutex cacheAccessMutex;
+};
+
+template<class TConfig>
+int DirentLookup<TConfig>::compareWithDirentAt(char ns, const std::string& key, entry_index_type i) const
+{
+  const auto dirent = direntAccessor.getDirent(index_t(i));
+  return ns < dirent->getNamespace() ? -1
+       : ns > dirent->getNamespace() ? 1
+       : key.compare(TConfig::getDirentKey(*dirent));
+}
+
+template<class TConfig>
+class FastDirentLookup : public DirentLookup<TConfig>
+{
+  typedef DirentLookup<TConfig> BaseType;
+  typedef typename BaseType::DirentAccessor DirentAccessor;
+  typedef typename BaseType::index_t index_t;
+
+public: // functions
+  FastDirentLookup(const DirentAccessor* _direntAccessor, entry_index_type cacheEntryCount);
+
+  typename BaseType::Result find(char ns, const std::string& key) const;
+
+private: // functions
+  std::string getDirentKey(entry_index_type i) const;
+
+private: // data
+  using BaseType::direntAccessor;
+  using BaseType::direntCount;
+  NarrowDown lookupGrid;
+};
+
+template<class TConfig>
+std::string
+FastDirentLookup<TConfig>::getDirentKey(entry_index_type i) const
+{
+  const auto d = direntAccessor.getDirent(index_t(i));
+  return d->getNamespace() + TConfig::getDirentKey(*d);
+}
+
+template<class TConfig>
+DirentLookup<TConfig>::DirentLookup(const DirentAccessor* _direntAccessor)
+  : direntAccessor(*_direntAccessor)
+  , direntCount(direntAccessor.getDirentCount())
+{
+}
+
+template<class TConfig>
+FastDirentLookup<TConfig>::FastDirentLookup(const DirentAccessor* _direntAccessor, entry_index_type cacheEntryCount)
+  : BaseType(_direntAccessor)
+{
+  if ( direntCount )
+  {
+    const entry_index_type step = std::max(1u, direntCount/cacheEntryCount);
+    for ( entry_index_type i = 0; i < direntCount-1; i += step )
+    {
+        lookupGrid.add(getDirentKey(i), i, getDirentKey(i+1));
+    }
+    lookupGrid.close(getDirentKey(direntCount - 1), direntCount - 1);
+  }
+}
+
+template<typename TDirentAccessor>
+entry_index_t getNamespaceBeginOffset(TDirentAccessor& direntAccessor, char ch)
+{
+  ASSERT(ch, >=, 32);
+  ASSERT(ch, <=, 127);
+
+  entry_index_type lower = 0;
+  entry_index_type upper = entry_index_type(direntAccessor.getDirentCount());
+  auto d = direntAccessor.getDirent(entry_index_t(0));
+  while (upper - lower > 1)
+  {
+    entry_index_type m = lower + (upper - lower) / 2;
+    auto d = direntAccessor.getDirent(entry_index_t(m));
+    if (d->getNamespace() >= ch)
+      upper = m;
+    else
+      lower = m;
+  }
+
+  entry_index_t ret = entry_index_t(d->getNamespace() < ch ? upper : lower);
+  return ret;
+}
+
+template<typename TDirentAccessor>
+entry_index_t getNamespaceEndOffset(TDirentAccessor& direntAccessor, char ch)
+{
+  ASSERT(ch, >=, 32);
+  ASSERT(ch, <, 127);
+  return getNamespaceBeginOffset(direntAccessor, ch+1);
+}
+
+
+
+template<class TConfig>
+typename DirentLookup<TConfig>::index_t
+DirentLookup<TConfig>::getNamespaceRangeBegin(char ch) const
+{
+  ASSERT(ch, >=, 32);
+  ASSERT(ch, <=, 127);
+
+  {
+    std::lock_guard<std::mutex> lock(cacheAccessMutex);
+    const auto it = namespaceBoundaryCache.find(ch);
+    if (it != namespaceBoundaryCache.end())
+      return it->second;
+  }
+
+  auto ret = getNamespaceBeginOffset(direntAccessor, ch);
+
+  std::lock_guard<std::mutex> lock(cacheAccessMutex);
+  namespaceBoundaryCache[ch] = ret;
+  return ret;
+}
+
+template<class TConfig>
+typename DirentLookup<TConfig>::index_t
+DirentLookup<TConfig>::getNamespaceRangeEnd(char ns) const
+{
+  return getNamespaceRangeBegin(ns+1);
+}
+
+template<typename TConfig>
+typename DirentLookup<TConfig>::Result
+FastDirentLookup<TConfig>::find(char ns, const std::string& key) const
+{
+  const auto r = lookupGrid.getRange(ns + key);
+  return BaseType::findInRange(r.begin, r.end, ns, key);
+}
+
+template<typename TConfig>
+typename DirentLookup<TConfig>::Result
+DirentLookup<TConfig>::find(char ns, const std::string& key) const
+{
+  return findInRange(0, direntCount, ns, key);
+}
+
+template<typename TConfig>
+typename DirentLookup<TConfig>::Result
+DirentLookup<TConfig>::findInRange(entry_index_type l, entry_index_type u, char ns, const std::string& key) const
+{
+  if ( l == u )
+      return { false, index_t(l) };
+
+  const auto c = compareWithDirentAt(ns, key, l);
+  if ( c < 0 )
+      return { false, index_t(l) };
+  else if ( c == 0 )
+      return { true, index_t(l) };
+
+  if ( compareWithDirentAt(ns, key, u-1) > 0 )
+      return { false, index_t(u) };
+
+  return binarySearchInRange(l, u-1, ns, key);
+}
+
+template<typename TConfig>
+typename DirentLookup<TConfig>::Result
+DirentLookup<TConfig>::binarySearchInRange(entry_index_type l, entry_index_type u, char ns, const std::string& key) const
+{
+  assert(l <= u && u < direntCount);
+  assert(compareWithDirentAt(ns, key, l) > 0);
+  assert(compareWithDirentAt(ns, key, u) <= 0);
+  // Invariant maintained by the binary search:
+  //    (entry at l) < (query entry ns/key) <= (entry at u)
+  while (true)
+  {
+    // compute p as the **upward rounded** average of l and u
+    const entry_index_type p = l + (u - l + 1) / 2;
+    const int c = compareWithDirentAt(ns, key, p);
+    if (c <= 0) { // (entry at l) < ns/key <= (entry at p) <= (entry at u)
+      if ( u == p ) {
+        return { c == 0, index_t(u) };
+      }
+      u = p;
+    } else {  // (entry at l) < (entry at p) < ns/key <= (entry at u)
+      l = p;
+    }
+  }
+}
+
+} // namespace zim
+
+#endif // ZIM_DIRENT_LOOKUP_H
diff --git a/src/direntreader.h b/src/direntreader.h
new file mode 100644
index 0000000..2dc84ed
--- /dev/null
+++ b/src/direntreader.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2020 Veloman Yunkan
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#ifndef ZIM_DIRENTREADER_H
+#define ZIM_DIRENTREADER_H
+
+#include "_dirent.h"
+#include "reader.h"
+
+#include <memory>
+#include <mutex>
+#include <vector>
+
+namespace zim
+{
+
+// Unlke FileReader and MemoryReader (which read data from a file and memory,
+// respectively), DirentReader is a helper class that reads Dirents (rather
+// than from a Dirent).
+class DirentReader
+{
+public: // functions
+  explicit DirentReader(std::shared_ptr<const Reader> zimReader)
+    : mp_zimReader(zimReader)
+  {}
+
+  std::shared_ptr<const Dirent> readDirent(offset_t offset);
+
+private: // functions
+  bool initDirent(Dirent& dirent, const Buffer& direntData) const;
+  
+private: // data
+  std::shared_ptr<const Reader> mp_zimReader;
+  std::vector<char> m_buffer;
+  std::mutex m_bufferMutex;
+};
+
+} // namespace zim
+
+#endif // ZIM_DIRENTREADER_H
diff --git a/src/endian_tools.h b/src/endian_tools.h
new file mode 100644
index 0000000..e51a58c
--- /dev/null
+++ b/src/endian_tools.h
@@ -0,0 +1,89 @@
+/*
+ * Copyright (C) 2018 Matthieu Gautier <mgautier@kymeria.fr>
+ * Copyright (C) 2006 Tommi Maekitalo
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#ifndef ENDIAN_H
+#define ENDIAN_H
+
+#include <algorithm>
+#include <iostream>
+#include <zim/zim.h>
+
+namespace zim
+{
+
+template<typename T, size_t N>
+struct ToLittleEndianImpl;
+
+template<typename T>
+struct ToLittleEndianImpl<T, 2>{
+  static void write(const T& d, char* dst) {
+    uint16_t v = static_cast<uint16_t>(d);
+    dst[0] = static_cast<uint8_t>(v);
+    dst[1] = static_cast<uint8_t>(v>>8);
+  }
+};
+
+template<typename T>
+struct ToLittleEndianImpl<T, 4>{
+  static void write(const T& d, char* dst) {
+    uint32_t v = static_cast<uint32_t>(d);
+    dst[0] = static_cast<uint8_t>(v);
+    dst[1] = static_cast<uint8_t>(v>>8);
+    dst[2] = static_cast<uint8_t>(v>>16);
+    dst[3] = static_cast<uint8_t>(v>>24);
+}
+};
+
+template<typename T>
+struct ToLittleEndianImpl<T, 8>{
+  static void write(const T& d, char* dst) {
+    uint64_t v = static_cast<uint64_t>(d);
+    dst[0] = static_cast<uint8_t>(v);
+    dst[1] = static_cast<uint8_t>(v>>8);
+    dst[2] = static_cast<uint8_t>(v>>16);
+    dst[3] = static_cast<uint8_t>(v>>24);
+    dst[4] = static_cast<uint8_t>(v>>32);
+    dst[5] = static_cast<uint8_t>(v>>40);
+    dst[6] = static_cast<uint8_t>(v>>48);
+    dst[7] = static_cast<uint8_t>(v>>56);
+  }
+};
+
+////////////////////////////////////////////////////////////////////////
+template <typename T>
+inline void toLittleEndian(T d, char* dst)
+{
+  ToLittleEndianImpl<T, sizeof(T)>::write(d, dst);
+}
+
+template <typename T>
+inline T fromLittleEndian(const char* ptr)
+{
+  T ret = 0;
+  for(size_t i=0; i<sizeof(T); i++) {
+    ret |= (static_cast<T>(static_cast<uint8_t>(ptr[i])) << (i*8));
+  }
+  return ret;
+}
+
+}
+
+#endif // ENDIAN_H
+
diff --git a/src/entry.cpp b/src/entry.cpp
new file mode 100644
index 0000000..717d45e
--- /dev/null
+++ b/src/entry.cpp
@@ -0,0 +1,94 @@
+/*
+ * Copyright (C) 2021 Renaud Gaudin <rgaudin@gmail.com>
+ * Copyright (C) 2020 Matthieu Gautier <mgautier@kymeria.fr>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#include <zim/entry.h>
+#include <zim/error.h>
+#include <zim/item.h>
+#include "_dirent.h"
+#include "fileimpl.h"
+#include "file_part.h"
+#include "log.h"
+
+#include <sstream>
+
+log_define("zim.entry")
+
+using namespace zim;
+
+Entry::Entry(std::shared_ptr<FileImpl> file, entry_index_type idx)
+  : m_file(file),
+    m_idx(idx),
+    m_dirent(file->getDirent(entry_index_t(idx)))
+{}
+
+std::string Entry::getTitle() const
+{
+  return m_dirent->getTitle();
+}
+
+std::string Entry::getPath() const
+{
+  if (m_file->hasNewNamespaceScheme()) {
+    return m_dirent->getUrl();
+  } else {
+    return m_dirent->getLongUrl();
+  }
+}
+
+bool Entry::isRedirect() const
+{
+  return m_dirent->isRedirect();
+}
+
+Item Entry::getItem(bool follow) const
+{
+  if (isRedirect()) {
+    if (! follow) {
+      std::ostringstream sstream;
+      sstream << "Entry " << getPath() << " is a redirect entry.";
+      throw InvalidType(sstream.str());
+    }
+    return getRedirect();
+ }
+
+  return Item(m_file, m_idx);
+}
+
+Item Entry::getRedirect() const {
+  auto nextEntry = getRedirectEntry();
+  auto watchdog = 50U;
+  while (nextEntry.isRedirect() && --watchdog) {
+    nextEntry = nextEntry.getRedirectEntry();
+  }
+  return nextEntry.getItem(false);
+}
+
+entry_index_type Entry::getRedirectEntryIndex() const  {
+  if (!isRedirect()) {
+    std::ostringstream sstream;
+    sstream << "Entry " << getPath() << " is not a redirect entry.";
+    throw InvalidType(sstream.str());
+  }
+  return m_dirent->getRedirectIndex().v;
+}
+
+Entry Entry::getRedirectEntry() const  {
+  return Entry(m_file, getRedirectEntryIndex());
+}
diff --git a/src/envvalue.cpp b/src/envvalue.cpp
new file mode 100644
index 0000000..1d5c64f
--- /dev/null
+++ b/src/envvalue.cpp
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2009 Tommi Maekitalo
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#include <sstream>
+#include <stdlib.h>
+
+namespace zim
+{
+  unsigned envValue(const char* env, unsigned def)
+  {
+    const char* v = ::getenv(env);
+    if (v)
+    {
+      std::istringstream s(v);
+      s >> def;
+    }
+    return def;
+  }
+
+  unsigned envMemSize(const char* env, unsigned def)
+  {
+    const char* v = ::getenv(env);
+    if (v)
+    {
+      char unit = '\0';
+      std::istringstream s(v);
+      s >> def >> unit;
+
+      switch (unit)
+      {
+        case 'k':
+        case 'K': def *= 1024; break;
+        case 'm':
+        case 'M': def *= 1024 * 1024; break;
+        case 'g':
+        case 'G': def *= 1024 * 1024 * 1024; break;
+      }
+    }
+    return def;
+  }
+}
+
diff --git a/src/envvalue.h b/src/envvalue.h
new file mode 100644
index 0000000..d6dffd4
--- /dev/null
+++ b/src/envvalue.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2009 Tommi Maekitalo
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#ifndef ZIM_ENVVALUE_H
+#define ZIM_ENVVALUE_H
+
+namespace zim
+{
+  unsigned envValue(const char* env, unsigned def);
+  unsigned envMemSize(const char* env, unsigned def);
+}
+
+#endif // ZIM_ENVVALUE_H
diff --git a/src/file_compound.cpp b/src/file_compound.cpp
new file mode 100644
index 0000000..a8f6bf1
--- /dev/null
+++ b/src/file_compound.cpp
@@ -0,0 +1,113 @@
+/*
+ * Copyright (C) 2020-2021 Veloman Yunkan
+ * Copyright (C) 2017-2021 Matthieu Gautier <mgautier@kymeria.fr>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#include "file_compound.h"
+#include "buffer.h"
+
+#include <errno.h>
+#include <string.h>
+#include <sstream>
+#include <sys/stat.h>
+
+#ifdef _WIN32
+#  include <io.h>
+#else
+#  include <unistd.h>
+#endif
+
+namespace zim {
+
+void FileCompound::addPart(FilePart* fpart)
+{
+  const Range newRange(offset_t(_fsize.v), offset_t((_fsize+fpart->size()).v));
+  emplace(newRange, fpart);
+  _fsize += fpart->size();
+}
+
+FileCompound::FileCompound(const std::string& filename):
+  _filename(filename),
+  _fsize(0)
+{
+  try {
+    addPart(new FilePart(filename));
+  } catch(...) {
+    int errnoSave = errno;
+    _fsize = zsize_t(0);
+    try {
+      for (char ch0 = 'a'; ch0 <= 'z'; ++ch0)
+      {
+        const std::string fname0 = filename + ch0;
+        for (char ch1 = 'a'; ch1 <= 'z'; ++ch1)
+        {
+          addPart(new FilePart(fname0 + ch1));
+        }
+      }
+    } catch (...) { }
+
+    if (empty())
+    {
+      std::ostringstream msg;
+      msg << "error " << errnoSave << " opening file \"" << filename;
+      throw std::runtime_error(msg.str());
+    }
+  }
+}
+
+#ifndef _WIN32
+FileCompound::FileCompound(int fd):
+  _filename(),
+  _fsize(0)
+{
+  addPart(new FilePart(fd));
+}
+#endif
+
+FileCompound::~FileCompound() {
+  for(auto it=begin(); it!=end(); it++) {
+    auto filepart = it->second;
+    delete filepart;
+  }
+}
+
+time_t FileCompound::getMTime() const {
+  if (mtime || empty())
+    return mtime;
+
+  const char* fname = begin()->second->filename().c_str();
+
+  #if defined(HAVE_STAT64) && ! defined(__APPLE__)
+    struct stat64 st;
+    int ret = ::stat64(fname, &st);
+  #else
+    struct stat st;
+    int ret = ::stat(fname, &st);
+  #endif
+  if (ret != 0)
+  {
+    std::ostringstream msg;
+    msg << "stat failed with errno " << errno << " : " << strerror(errno);
+    throw std::runtime_error(msg.str());
+  }
+  mtime = st.st_mtime;
+
+  return mtime;
+}
+
+} // zim
diff --git a/src/file_compound.h b/src/file_compound.h
new file mode 100644
index 0000000..47b036e
--- /dev/null
+++ b/src/file_compound.h
@@ -0,0 +1,112 @@
+/*
+ * Copyright (C) 2020-2021 Veloman Yunkan
+ * Copyright (C) 2017-2021 Matthieu Gautier <mgautier@kymeria.fr>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#ifndef ZIM_FILE_COMPOUND_H_
+#define ZIM_FILE_COMPOUND_H_
+
+#include "file_part.h"
+#include "zim_types.h"
+#include "debug.h"
+#include <map>
+#include <memory>
+#include <cstdio>
+
+namespace zim {
+
+struct Range {
+  Range(const offset_t  min, const offset_t max)
+    : min(min), max(max)
+  {
+      // ASSERT(min, <, max);
+  }
+
+  const offset_t min;
+  const offset_t max;
+};
+
+struct less_range : public std::binary_function< Range, Range, bool>
+{
+  bool operator()(const Range& lhs, const Range& rhs) const {
+    return lhs.min < rhs.min && lhs.max <= rhs.min;
+  }
+};
+
+class FileCompound : private std::map<Range, FilePart*, less_range> {
+    typedef std::map<Range, FilePart*, less_range> ImplType;
+
+  public: // types
+    typedef const_iterator PartIterator;
+    typedef std::pair<PartIterator, PartIterator> PartRange;
+
+  public: // functions
+    explicit FileCompound(const std::string& filename);
+
+#ifndef _WIN32
+    explicit FileCompound(int fd);
+#endif
+
+    ~FileCompound();
+
+    using ImplType::begin;
+    using ImplType::end;
+
+    const std::string& filename() const { return _filename; }
+    zsize_t fsize() const { return _fsize; };
+    time_t getMTime() const;
+    bool fail() const { return empty(); };
+    bool is_multiPart() const { return size() > 1; };
+
+    PartIterator locate(offset_t offset) const {
+      const PartIterator partIt = lower_bound(Range(offset, offset));
+      ASSERT(partIt != end(), ==, true);
+      return partIt;
+    }
+
+    PartRange locate(offset_t offset, zsize_t size) const {
+#if ! defined(__APPLE__)
+        return equal_range(Range(offset, offset+size));
+#else
+        // Workaround for https://github.com/openzim/libzim/issues/398
+        // Under MacOS the implementation of std::map::equal_range() makes
+        // assumptions about the properties of the key comparison function and
+        // abuses the std::map requirement that it must contain unique keys. As
+        // a result, when a map m is queried with an element k that is
+        // equivalent to more than one keys present in m,
+        // m.equal_range(k).first may be different from m.lower_bound(k) (the
+        // latter one returning the correct result).
+        const Range queryRange(offset, offset+size);
+        return {lower_bound(queryRange), upper_bound(queryRange)};
+#endif // ! defined(__APPLE__)
+    }
+
+  private: // functions
+    void addPart(FilePart* fpart);
+
+  private: // data
+    std::string _filename;
+    zsize_t _fsize;
+    mutable time_t mtime;
+};
+
+
+};
+
+
+#endif //ZIM_FILE_COMPOUND_H_
diff --git a/src/file_part.h b/src/file_part.h
new file mode 100644
index 0000000..6362baf
--- /dev/null
+++ b/src/file_part.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (C) 2020-2021 Veloman Yunkan
+ * Copyright (C) 2017-2021 Matthieu Gautier <mgautier@kymeria.fr>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#ifndef ZIM_FILE_PART_H_
+#define ZIM_FILE_PART_H_
+
+#include <string>
+#include <cstdio>
+#include <memory>
+
+#include <zim/zim.h>
+
+#include "zim_types.h"
+#include "fs.h"
+
+namespace zim {
+
+class FilePart {
+  typedef DEFAULTFS FS;
+
+  public:
+    using FDSharedPtr = std::shared_ptr<FS::FD>;
+
+  public:
+    FilePart(const std::string& filename) :
+        m_filename(filename),
+        m_fhandle(std::make_shared<FS::FD>(FS::openFile(filename))),
+        m_size(m_fhandle->getSize()) {}
+
+#ifndef _WIN32
+    FilePart(int fd) :
+        FilePart(getFilePathFromFD(fd)) {}
+#endif
+
+    ~FilePart() = default;
+    const std::string& filename() const { return m_filename; };
+    const FS::FD& fhandle() const { return *m_fhandle; };
+    const FDSharedPtr& shareable_fhandle() const { return m_fhandle; };
+
+    zsize_t size() const { return m_size; };
+    bool fail() const { return !m_size; };
+    bool good() const { return bool(m_size); };
+
+  private:
+    const std::string m_filename;
+    FDSharedPtr m_fhandle;
+    zsize_t m_size;
+};
+
+};
+
+#endif //ZIM_FILE_PART_H_
diff --git a/src/file_reader.cpp b/src/file_reader.cpp
new file mode 100644
index 0000000..74cbc3a
--- /dev/null
+++ b/src/file_reader.cpp
@@ -0,0 +1,293 @@
+/*
+ * Copyright (C) 2017-2021 Matthieu Gautier <mgautier@kymeria.fr>
+ * Copyright (C) 2020 Veloman Yunkan
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#include <zim/zim.h>
+#include <zim/error.h>
+#include "file_reader.h"
+#include "file_compound.h"
+#include "buffer.h"
+#include <errno.h>
+#include <string.h>
+#include <cstring>
+#include <fcntl.h>
+#include <sstream>
+#include <system_error>
+#include <algorithm>
+
+
+#ifndef _WIN32
+#  include <sys/mman.h>
+#  include <unistd.h>
+#endif
+
+#if defined(_MSC_VER)
+# include <io.h>
+# include <BaseTsd.h>
+  typedef SSIZE_T ssize_t;
+#endif
+
+namespace zim {
+
+////////////////////////////////////////////////////////////////////////////////
+// MultiPartFileReader
+////////////////////////////////////////////////////////////////////////////////
+
+MultiPartFileReader::MultiPartFileReader(std::shared_ptr<const FileCompound> source)
+  : MultiPartFileReader(source, offset_t(0), source->fsize()) {}
+
+MultiPartFileReader::MultiPartFileReader(std::shared_ptr<const FileCompound> source, offset_t offset, zsize_t size)
+  : source(source),
+    _offset(offset),
+    _size(size)
+{
+  ASSERT(offset.v, <=, source->fsize().v);
+  ASSERT(offset.v+size.v, <=, source->fsize().v);
+}
+
+char MultiPartFileReader::read(offset_t offset) const {
+  ASSERT(offset.v, <, _size.v);
+  offset += _offset;
+  auto part_pair = source->locate(offset);
+  auto& fhandle = part_pair->second->fhandle();
+  offset_t local_offset = offset - part_pair->first.min;
+  ASSERT(local_offset, <=, part_pair->first.max);
+  char ret;
+  try {
+    fhandle.readAt(&ret, zsize_t(1), local_offset);
+  } catch (std::runtime_error& e) {
+    //Error while reading.
+    std::ostringstream s;
+    s << "Cannot read a char.\n";
+    s << " - File part is " <<  part_pair->second->filename() << "\n";
+    s << " - File part size is " << part_pair->second->size().v << "\n";
+    s << " - File part range is " << part_pair->first.min << "-" << part_pair->first.max << "\n";
+    s << " - Reading offset at " << offset.v << "\n";
+    s << " - local offset is " << local_offset.v << "\n";
+    s << " - error is " << strerror(errno) << "\n";
+    std::error_code ec(errno, std::generic_category());
+    throw std::system_error(ec, s.str());
+  };
+  return ret;
+}
+
+void MultiPartFileReader::read(char* dest, offset_t offset, zsize_t size) const {
+  ASSERT(offset.v, <=, _size.v);
+  ASSERT(offset.v+size.v, <=, _size.v);
+  if (! size ) {
+    return;
+  }
+  offset += _offset;
+  auto found_range = source->locate(offset, size);
+  for(auto current = found_range.first; current!=found_range.second; current++){
+    auto part = current->second;
+    Range partRange = current->first;
+    offset_t local_offset = offset-partRange.min;
+    ASSERT(size.v, >, 0U);
+    zsize_t size_to_get = zsize_t(std::min(size.v, part->size().v-local_offset.v));
+    try {
+      part->fhandle().readAt(dest, size_to_get, local_offset);
+    } catch (std::runtime_error& e) {
+      std::ostringstream s;
+      s << "Cannot read chars.\n";
+      s << " - File part is " <<  part->filename() << "\n";
+      s << " - File part size is " << part->size().v << "\n";
+      s << " - File part range is " << partRange.min << "-" << partRange.max << "\n";
+      s << " - size_to_get is " << size_to_get.v << "\n";
+      s << " - total size is " << size.v << "\n";
+      s << " - Reading offset at " << offset.v << "\n";
+      s << " - local offset is " << local_offset.v << "\n";
+      s << " - error is " << strerror(errno) << "\n";
+      std::error_code ec(errno, std::generic_category());
+      throw std::system_error(ec, s.str());
+    };
+    ASSERT(size_to_get, <=, size);
+    dest += size_to_get.v;
+    size -= size_to_get;
+    offset += size_to_get;
+  }
+  ASSERT(size.v, ==, 0U);
+}
+
+#ifdef ENABLE_USE_MMAP
+namespace
+{
+
+class MMapException : std::exception {};
+
+char*
+mmapReadOnly(int fd, offset_type offset, size_type size)
+{
+#if defined(__APPLE__) || defined(__OpenBSD__)
+  const auto MAP_FLAGS = MAP_PRIVATE;
+#elif defined(__FreeBSD__)
+  const auto MAP_FLAGS = MAP_PRIVATE|MAP_PREFAULT_READ;
+#else
+  const auto MAP_FLAGS = MAP_PRIVATE|MAP_POPULATE;
+#endif
+
+  const auto p = (char*)mmap(NULL, size, PROT_READ, MAP_FLAGS, fd, offset);
+  if (p == MAP_FAILED )
+  {
+    std::ostringstream s;
+    s << "Cannot mmap size " << size << " at off " << offset
+      << " : " << strerror(errno);
+    throw std::runtime_error(s.str());
+  }
+  return p;
+}
+
+Buffer::DataPtr
+makeMmappedBuffer(int fd, offset_t offset, zsize_t size)
+{
+  const offset_type pageAlignedOffset(offset.v & ~(sysconf(_SC_PAGE_SIZE) - 1));
+  const size_t alignmentAdjustment = offset.v - pageAlignedOffset;
+  size += alignmentAdjustment;
+
+#if !MMAP_SUPPORT_64
+  if(pageAlignedOffset >= INT32_MAX) {
+    throw MMapException();
+  }
+#endif
+  char* const mmappedAddress = mmapReadOnly(fd, pageAlignedOffset, size.v);
+  const auto munmapDeleter = [mmappedAddress, size](char* ) {
+                               munmap(mmappedAddress, size.v);
+                             };
+
+  return Buffer::DataPtr(mmappedAddress+alignmentAdjustment, munmapDeleter);
+}
+
+} // unnamed namespace
+#endif // ENABLE_USE_MMAP
+
+const Buffer MultiPartFileReader::get_buffer(offset_t offset, zsize_t size) const {
+  ASSERT(size, <=, _size);
+#ifdef ENABLE_USE_MMAP
+  try {
+    auto found_range = source->locate(_offset+offset, size);
+    auto first_part_containing_it = found_range.first;
+    if (++first_part_containing_it != found_range.second) {
+      throw MMapException();
+    }
+
+    // The range is in only one part
+    auto range = found_range.first->first;
+    auto part = found_range.first->second;
+    auto local_offset = offset + _offset - range.min;
+    ASSERT(size, <=, part->size());
+    int fd = part->fhandle().getNativeHandle();
+    return Buffer::makeBuffer(makeMmappedBuffer(fd, local_offset, size), size);
+  } catch(MMapException& e)
+#endif
+  {
+    // The range is several part, or we are on Windows.
+    // We will have to do some memory copies :/
+    // [TODO] Use Windows equivalent for mmap.
+    auto ret_buffer = Buffer::makeBuffer(size);
+    read(const_cast<char*>(ret_buffer.data()), offset, size);
+    return ret_buffer;
+  }
+}
+
+bool Reader::can_read(offset_t offset, zsize_t size) const
+{
+    return (offset.v <= this->size().v && (offset.v+size.v) <= this->size().v);
+}
+
+
+std::unique_ptr<const Reader> MultiPartFileReader::sub_reader(offset_t offset, zsize_t size) const
+{
+  ASSERT(offset.v+size.v, <=, _size.v);
+  // TODO: can use a FileReader here if the new range fully belongs to a single part
+  return std::unique_ptr<Reader>(new MultiPartFileReader(source, _offset+offset, size));
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// FileReader
+////////////////////////////////////////////////////////////////////////////////
+
+FileReader::FileReader(FileHandle fh, offset_t offset, zsize_t size)
+  : _fhandle(fh)
+  , _offset(offset)
+  , _size(size)
+{
+}
+
+char FileReader::read(offset_t offset) const
+{
+  ASSERT(offset.v, <, _size.v);
+  offset += _offset;
+  char ret;
+  try {
+    _fhandle->readAt(&ret, zsize_t(1), offset);
+  } catch (std::runtime_error& e) {
+    //Error while reading.
+    std::ostringstream s;
+    s << "Cannot read a char.\n";
+    s << " - Reading offset at " << offset.v << "\n";
+    s << " - error is " << strerror(errno) << "\n";
+    std::error_code ec(errno, std::generic_category());
+    throw std::system_error(ec, s.str());
+  };
+  return ret;
+}
+
+void FileReader::read(char* dest, offset_t offset, zsize_t size) const
+{
+  ASSERT(offset.v, <=, _size.v);
+  ASSERT(offset.v+size.v, <=, _size.v);
+  if (! size ) {
+    return;
+  }
+  offset += _offset;
+  try {
+    _fhandle->readAt(dest, size, offset);
+  } catch (std::runtime_error& e) {
+    std::ostringstream s;
+    s << "Cannot read chars.\n";
+    s << " - Reading offset at " << offset.v << "\n";
+    s << " - size is " << size.v << "\n";
+    s << " - error is " << strerror(errno) << "\n";
+    std::error_code ec(errno, std::generic_category());
+    throw std::system_error(ec, s.str());
+  };
+}
+
+const Buffer FileReader::get_buffer(offset_t offset, zsize_t size) const
+{
+  ASSERT(size, <=, _size);
+#ifdef ENABLE_USE_MMAP
+  offset += _offset;
+  int fd = _fhandle->getNativeHandle();
+  return Buffer::makeBuffer(makeMmappedBuffer(fd, offset, size), size);
+#else // We are on Windows. [TODO] Use Windows equivalent for mmap.
+  auto ret_buffer = Buffer::makeBuffer(size);
+  read(const_cast<char*>(ret_buffer.data()), offset, size);
+  return ret_buffer;
+#endif
+}
+
+std::unique_ptr<const Reader>
+FileReader::sub_reader(offset_t offset, zsize_t size) const
+{
+  ASSERT(offset.v+size.v, <=, _size.v);
+  return std::unique_ptr<const Reader>(new FileReader(_fhandle, _offset + offset, size));
+}
+
+} // zim
diff --git a/src/file_reader.h b/src/file_reader.h
new file mode 100644
index 0000000..36c3a74
--- /dev/null
+++ b/src/file_reader.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright (C) 2017-2021 Matthieu Gautier <mgautier@kymeria.fr>
+ * Copyright (C) 2020 Veloman Yunkan
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#ifndef ZIM_FILE_READER_H_
+#define ZIM_FILE_READER_H_
+
+#include "reader.h"
+#include "fs.h"
+
+namespace zim {
+
+class FileCompound;
+
+class FileReader : public Reader {
+  public: // types
+    typedef std::shared_ptr<const DEFAULTFS::FD> FileHandle;
+
+  public: // functions
+    explicit FileReader(FileHandle fh, offset_t offset, zsize_t size);
+    ~FileReader() = default;
+
+    zsize_t size() const { return _size; };
+    offset_t offset() const { return _offset; };
+
+    char read(offset_t offset) const;
+    void read(char* dest, offset_t offset, zsize_t size) const;
+    const Buffer get_buffer(offset_t offset, zsize_t size) const;
+
+    std::unique_ptr<const Reader> sub_reader(offset_t offset, zsize_t size) const;
+
+  private: // data
+    // The file handle is stored via a shared pointer so that it can be shared
+    // by a sub_reader (otherwise the file handle would be invalidated by
+    // FD destructor when the sub-reader is destroyed).
+    FileHandle _fhandle;
+    offset_t _offset;
+    zsize_t _size;
+};
+
+class MultiPartFileReader : public Reader {
+  public:
+    MultiPartFileReader(std::shared_ptr<const FileCompound> source);
+    ~MultiPartFileReader() {};
+
+    zsize_t size() const { return _size; };
+    offset_t offset() const { return _offset; };
+
+    char read(offset_t offset) const;
+    void read(char* dest, offset_t offset, zsize_t size) const;
+    const Buffer get_buffer(offset_t offset, zsize_t size) const;
+
+    std::unique_ptr<const Reader> sub_reader(offset_t offset, zsize_t size) const;
+
+  private:
+    MultiPartFileReader(std::shared_ptr<const FileCompound> source, offset_t offset, zsize_t size);
+
+    std::shared_ptr<const FileCompound> source;
+    offset_t _offset;
+    zsize_t _size;
+};
+
+};
+
+#endif // ZIM_FILE_READER_H_
diff --git a/src/fileheader.cpp b/src/fileheader.cpp
new file mode 100644
index 0000000..e985094
--- /dev/null
+++ b/src/fileheader.cpp
@@ -0,0 +1,142 @@
+/*
+ * Copyright (C) 2017-2020 Mattieu Gautier <mgautier@kymeria.fr>
+ * Copyright (C) 2008 Tommi Maekitalo
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#include "fileheader.h"
+#include <zim/error.h>
+#include <iostream>
+#include <algorithm>
+#include "log.h"
+#include "endian_tools.h"
+#include "reader.h"
+#include "bufferstreamer.h"
+#include "buffer.h"
+#ifdef _WIN32
+# include "io.h"
+#else
+# include "unistd.h"
+# define _write(fd, addr, size) ::write((fd), (addr), (size))
+#endif
+
+log_define("zim.file.header")
+
+namespace zim
+{
+  const uint32_t Fileheader::zimMagic = 0x044d495a; // ="ZIM^d"
+  const uint16_t Fileheader::zimOldMajorVersion = 5;
+  const uint16_t Fileheader::zimMajorVersion = 6;
+  const uint16_t Fileheader::zimMinorVersion = 1;
+  const offset_type Fileheader::size = 80; // This is also mimeListPos (so an offset)
+
+  void Fileheader::write(int out_fd) const
+  {
+    char header[Fileheader::size];
+    toLittleEndian(Fileheader::zimMagic, header);
+    toLittleEndian(getMajorVersion(), header + 4);
+    toLittleEndian(getMinorVersion(), header + 6);
+    std::copy(getUuid().data, getUuid().data + sizeof(Uuid), header + 8);
+    toLittleEndian(getArticleCount(), header + 24);
+    toLittleEndian(getClusterCount(), header + 28);
+    toLittleEndian(getUrlPtrPos(), header + 32);
+    toLittleEndian(getTitleIdxPos(), header + 40);
+    toLittleEndian(getClusterPtrPos(), header + 48);
+    toLittleEndian(getMimeListPos(), header + 56);
+    toLittleEndian(getMainPage(), header + 64);
+    toLittleEndian(getLayoutPage(), header + 68);
+    toLittleEndian(getChecksumPos(), header + 72);
+
+    auto ret = _write(out_fd, header, Fileheader::size);
+    if (ret != Fileheader::size) {
+      std::cerr << "Error Writing" << std::endl;
+      std::cerr << "Ret is " << ret << std::endl;
+      perror("Error writing");
+      throw std::runtime_error("Error writing");
+    }
+  }
+
+  void Fileheader::read(const Reader& reader)
+  {
+    auto buffer = reader.get_buffer(offset_t(0), zsize_t(Fileheader::size));
+    auto seqReader = BufferStreamer(buffer);
+    uint32_t magicNumber = seqReader.read<uint32_t>();
+    if (magicNumber != Fileheader::zimMagic)
+    {
+      log_error("invalid magic number " << magicNumber << " found - "
+          << Fileheader::zimMagic << " expected");
+      throw ZimFileFormatError("Invalid magic number");
+    }
+
+    uint16_t major_version = seqReader.read<uint16_t>();
+    if (major_version != zimOldMajorVersion && major_version != zimMajorVersion)
+    {
+      log_error("invalid zimfile major version " << major_version << " found - "
+          << Fileheader::zimMajorVersion << " expected");
+      throw ZimFileFormatError("Invalid version");
+    }
+    setMajorVersion(major_version);
+
+    setMinorVersion(seqReader.read<uint16_t>());
+
+    Uuid uuid;
+    std::copy(seqReader.current(), seqReader.current()+16, uuid.data);
+    seqReader.skip(zsize_t(16));
+    setUuid(uuid);
+
+    setArticleCount(seqReader.read<uint32_t>());
+    setClusterCount(seqReader.read<uint32_t>());
+    setUrlPtrPos(seqReader.read<uint64_t>());
+    setTitleIdxPos(seqReader.read<uint64_t>());
+    setClusterPtrPos(seqReader.read<uint64_t>());
+    setMimeListPos(seqReader.read<uint64_t>());
+    setMainPage(seqReader.read<uint32_t>());
+    setLayoutPage(seqReader.read<uint32_t>());
+    setChecksumPos(seqReader.read<uint64_t>());
+
+    sanity_check();
+  }
+
+  void Fileheader::sanity_check() const {
+    if (!!articleCount != !!clusterCount) {
+      throw ZimFileFormatError("No article <=> No cluster");
+    }
+
+    if (mimeListPos != size && mimeListPos != 72) {
+      throw ZimFileFormatError("mimelistPos must be 80.");
+    }
+
+    if (urlPtrPos < mimeListPos) {
+      throw ZimFileFormatError("urlPtrPos must be > mimelistPos.");
+    }
+    if (titleIdxPos < mimeListPos) {
+      throw ZimFileFormatError("titleIdxPos must be > mimelistPos.");
+    }
+    if (clusterPtrPos < mimeListPos) {
+      throw ZimFileFormatError("clusterPtrPos must be > mimelistPos.");
+    }
+
+    if (clusterCount > articleCount) {
+      throw ZimFileFormatError("Cluster count cannot be higher than article count.");
+    }
+
+    if (checksumPos != 0 && checksumPos < mimeListPos) {
+      throw ZimFileFormatError("checksumPos must be > mimeListPos.");
+    }
+  }
+
+}
diff --git a/src/fileheader.h b/src/fileheader.h
new file mode 100644
index 0000000..95be691
--- /dev/null
+++ b/src/fileheader.h
@@ -0,0 +1,124 @@
+/*
+ * Copyright (C) 2017-2020 Matthieu Gautier <mgautier@kymeria.fr>
+ * Copyright (C) 2008 Tommi Maekitalo
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#ifndef ZIM_FILEHEADER_H
+#define ZIM_FILEHEADER_H
+
+#include <memory>
+#include <zim/zim.h>
+#include <zim/uuid.h>
+#include <iosfwd>
+#include <limits>
+
+// max may be defined as a macro by window includes
+#ifdef max
+#undef max
+#endif
+
+namespace zim
+{
+  class Reader;
+  class Fileheader
+  {
+    public:
+      static const uint32_t zimMagic;
+      static const uint16_t zimOldMajorVersion;
+      static const uint16_t zimMajorVersion;
+      static const uint16_t zimMinorVersion;
+      static const size_type size;
+
+    private:
+      uint16_t majorVersion;
+      uint16_t minorVersion;
+      Uuid uuid;
+      entry_index_type articleCount;
+      offset_type titleIdxPos;
+      offset_type urlPtrPos;
+      offset_type mimeListPos;
+      cluster_index_type clusterCount;
+      offset_type clusterPtrPos;
+      entry_index_type mainPage;
+      entry_index_type layoutPage;
+      offset_type checksumPos;
+
+    public:
+      Fileheader()
+        : majorVersion(zimMajorVersion),
+          minorVersion(zimMinorVersion),
+          articleCount(0),
+          titleIdxPos(0),
+          urlPtrPos(0),
+          clusterCount(0),
+          clusterPtrPos(0),
+          mainPage(std::numeric_limits<entry_index_type>::max()),
+          layoutPage(std::numeric_limits<entry_index_type>::max()),
+          checksumPos(std::numeric_limits<offset_type>::max())
+      {}
+
+      void write(int out_fd) const;
+      void read(const Reader& reader);
+
+      // Do some sanity check, raise a ZimFileFormateError is
+      // something is wrong.
+      void sanity_check() const;
+
+      uint16_t getMajorVersion() const             { return majorVersion; }
+      void setMajorVersion(uint16_t v)             { majorVersion = v; }
+
+      uint16_t getMinorVersion() const             { return minorVersion; }
+      void setMinorVersion(uint16_t v)             { minorVersion = v; }
+
+      const Uuid& getUuid() const                  { return uuid; }
+      void setUuid(const Uuid& uuid_)              { uuid = uuid_; }
+
+      entry_index_type getArticleCount() const            { return articleCount; }
+      void      setArticleCount(entry_index_type s)       { articleCount = s; }
+
+      offset_type getTitleIdxPos() const           { return titleIdxPos; }
+      void        setTitleIdxPos(offset_type p)    { titleIdxPos = p; }
+
+      offset_type getUrlPtrPos() const             { return urlPtrPos; }
+      void        setUrlPtrPos(offset_type p)      { urlPtrPos = p; }
+
+      offset_type getMimeListPos() const           { return mimeListPos; }
+      void        setMimeListPos(offset_type p)    { mimeListPos = p; }
+
+      cluster_index_type   getClusterCount() const          { return clusterCount; }
+      void        setClusterCount(cluster_index_type s)     { clusterCount = s; }
+
+      offset_type getClusterPtrPos() const         { return clusterPtrPos; }
+      void        setClusterPtrPos(offset_type p)  { clusterPtrPos = p; }
+
+      bool        hasMainPage() const              { return mainPage != std::numeric_limits<entry_index_type>::max(); }
+      entry_index_type   getMainPage() const     { return mainPage; }
+      void        setMainPage(entry_index_type s){ mainPage = s; }
+
+      bool        hasLayoutPage() const            { return layoutPage != std::numeric_limits<entry_index_type>::max(); }
+      entry_index_type   getLayoutPage() const   { return layoutPage; }
+      void        setLayoutPage(entry_index_type s)       { layoutPage = s; }
+
+      bool        hasChecksum() const              { return getMimeListPos() >= 80; }
+      offset_type getChecksumPos() const           { return hasChecksum() ? checksumPos : 0; }
+      void        setChecksumPos(offset_type p)    { checksumPos = p; }
+  };
+
+}
+
+#endif // ZIM_FILEHEADER_H
diff --git a/src/fileimpl.cpp b/src/fileimpl.cpp
new file mode 100644
index 0000000..1593ba6
--- /dev/null
+++ b/src/fileimpl.cpp
@@ -0,0 +1,676 @@
+/*
+ * Copyright (C) 2017-2021 Matthieu Gautier <mgautier@kymeria.fr>
+ * Copyright (C) 2020-2021 Veloman Yunkan
+ * Copyright (C) 2006,2009 Tommi Maekitalo
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#include "fileimpl.h"
+#include <zim/error.h>
+#include "_dirent.h"
+#include "file_compound.h"
+#include "buffer_reader.h"
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sstream>
+#include <errno.h>
+#include <cstring>
+#include <fstream>
+#include "config.h"
+#include "log.h"
+#include "envvalue.h"
+#include "md5.h"
+#include "tools.h"
+
+log_define("zim.file.impl")
+
+namespace zim
+{
+
+namespace
+{
+
+offset_t readOffset(const Reader& reader, entry_index_type idx)
+{
+  offset_t offset(reader.read_uint<offset_type>(offset_t(sizeof(offset_type)*idx)));
+  return offset;
+}
+
+std::unique_ptr<const Reader>
+sectionSubReader(const Reader& zimReader, const std::string& sectionName,
+                 offset_t offset, zsize_t size)
+{
+  if (!zimReader.can_read(offset, size)) {
+    throw ZimFileFormatError(sectionName + " outside (or not fully inside) ZIM file.");
+  }
+#ifdef ENABLE_USE_BUFFER_HEADER
+  const auto buf = zimReader.get_buffer(offset, size);
+  return std::unique_ptr<Reader>(new BufferReader(buf));
+#else
+  return zimReader.sub_reader(offset, size);
+#endif
+}
+
+std::shared_ptr<Reader>
+makeFileReader(std::shared_ptr<const FileCompound> zimFile, offset_t offset, zsize_t size)
+{
+  if (zimFile->fail()) {
+    return nullptr;
+  } else if ( zimFile->is_multiPart() ) {
+    ASSERT(offset.v, ==, 0u);
+    ASSERT(size, ==, zimFile->fsize());
+    return std::make_shared<MultiPartFileReader>(zimFile);
+  } else {
+    const auto& firstAndOnlyPart = zimFile->begin()->second;
+    return std::make_shared<FileReader>(firstAndOnlyPart->shareable_fhandle(), offset, size);
+  }
+}
+
+} //unnamed namespace
+
+  //////////////////////////////////////////////////////////////////////
+  // FileImpl
+  //
+  FileImpl::FileImpl(const std::string& fname)
+    : FileImpl(std::make_shared<FileCompound>(fname))
+  {}
+
+#ifndef _WIN32
+  FileImpl::FileImpl(int fd)
+    : FileImpl(std::make_shared<FileCompound>(fd))
+  {}
+
+  FileImpl::FileImpl(int fd, offset_t offset, zsize_t size)
+    : FileImpl(std::make_shared<FileCompound>(fd), offset, size)
+  {}
+#endif
+
+  FileImpl::FileImpl(std::shared_ptr<FileCompound> _zimFile)
+    : FileImpl(_zimFile, offset_t(0), _zimFile->fsize())
+  {}
+
+  FileImpl::FileImpl(std::shared_ptr<FileCompound> _zimFile, offset_t offset, zsize_t size)
+    : zimFile(_zimFile),
+      archiveStartOffset(offset),
+      zimReader(makeFileReader(zimFile, offset, size)),
+      direntReader(new DirentReader(zimReader)),
+      clusterCache(envValue("ZIM_CLUSTERCACHE", CLUSTER_CACHE_SIZE)),
+      m_newNamespaceScheme(false),
+      m_hasFrontArticlesIndex(true),
+      m_startUserEntry(0),
+      m_endUserEntry(0)
+  {
+    log_trace("read file \"" << zimFile->filename() << '"');
+
+    if (zimFile->fail())
+      throw ZimFileFormatError(std::string("can't open zim-file \"") + zimFile->filename() + '"');
+
+    // read header
+    if (size_type(zimReader->size()) < Fileheader::size) {
+      throw ZimFileFormatError("zim-file is too small to contain a header");
+    }
+    try {
+      header.read(*zimReader);
+    } catch (ZimFileFormatError& e) {
+      throw e;
+    } catch (...) {
+      throw ZimFileFormatError("error reading zim-file header.");
+    }
+
+    auto urlPtrReader = sectionSubReader(*zimReader,
+                                         "Dirent pointer table",
+                                         offset_t(header.getUrlPtrPos()),
+                                         zsize_t(sizeof(offset_type)*header.getArticleCount()));
+
+    mp_urlDirentAccessor.reset(
+        new DirectDirentAccessor(direntReader, std::move(urlPtrReader), entry_index_t(header.getArticleCount())));
+
+
+    clusterOffsetReader = sectionSubReader(*zimReader,
+                                           "Cluster pointer table",
+                                           offset_t(header.getClusterPtrPos()),
+                                           zsize_t(sizeof(offset_type)*header.getClusterCount()));
+
+    quickCheckForCorruptFile();
+
+    mp_titleDirentAccessor = getTitleAccessor("listing/titleOrdered/v1");
+
+    if (!mp_titleDirentAccessor) {
+      offset_t titleOffset(header.getTitleIdxPos());
+      zsize_t  titleSize(sizeof(entry_index_type)*header.getArticleCount());
+      mp_titleDirentAccessor = getTitleAccessor(titleOffset, titleSize, "Title index table");
+      const_cast<bool&>(m_hasFrontArticlesIndex) = false;
+    }
+    m_byTitleDirentLookup.reset(new ByTitleDirentLookup(mp_titleDirentAccessor.get()));
+
+    readMimeTypes();
+  }
+
+  std::unique_ptr<IndirectDirentAccessor> FileImpl::getTitleAccessor(const std::string& path)
+  {
+    auto result = direntLookup().find('X', path);
+    if (!result.first) {
+      return nullptr;
+    }
+
+    auto dirent = mp_urlDirentAccessor->getDirent(result.second);
+    auto cluster = getCluster(dirent->getClusterNumber());
+    if (cluster->isCompressed()) {
+      // This is a ZimFileFormatError.
+      // Let's be tolerent and skip the entry
+      return nullptr;
+    }
+    auto titleOffset = getClusterOffset(dirent->getClusterNumber()) + cluster->getBlobOffset(dirent->getBlobNumber());
+    auto titleSize = cluster->getBlobSize(dirent->getBlobNumber());
+    return getTitleAccessor(titleOffset, titleSize, "Title index table" + path);
+  }
+
+  std::unique_ptr<IndirectDirentAccessor> FileImpl::getTitleAccessor(const offset_t offset, const zsize_t size, const std::string& name)
+  {
+      auto titleIndexReader = sectionSubReader(*zimReader,
+                                               name,
+                                               offset,
+                                               size);
+
+      return std::unique_ptr<IndirectDirentAccessor>(
+        new IndirectDirentAccessor(mp_urlDirentAccessor, std::move(titleIndexReader), title_index_t(size.v/sizeof(entry_index_type))));
+  }
+
+  FileImpl::DirentLookup& FileImpl::direntLookup() const
+  {
+    // Not using std::call_once because it is buggy.
+    // 1. It doesn't play well with musl libc - an exception thrown by the
+    //    callable results in SIGABRT even if there is a handler for it higher
+    //    in the call stack.
+    // 2. With `glibc` an exceptional execution of `std::call_once` doesn't
+    //    unlock the mutex associated with the `std::once_flag` object.
+    if ( !m_direntLookup ) {
+      std::lock_guard<std::mutex> lock(m_direntLookupCreationMutex);
+      if ( !m_direntLookup ) {
+        const auto cacheSize = envValue("ZIM_DIRENTLOOKUPCACHE", DIRENT_LOOKUP_CACHE_SIZE);
+        m_direntLookup.reset(new DirentLookup(mp_urlDirentAccessor.get(), cacheSize));
+      }
+    }
+    return *m_direntLookup;
+  }
+
+  void FileImpl::quickCheckForCorruptFile()
+  {
+    if (!getCountClusters())
+      log_warn("no clusters found");
+    else
+    {
+      offset_t lastOffset = getClusterOffset(cluster_index_t(cluster_index_type(getCountClusters()) - 1));
+      log_debug("last offset=" << lastOffset.v << " file size=" << getFilesize().v);
+      if (lastOffset.v > getFilesize().v)
+      {
+        log_fatal("last offset (" << lastOffset << ") larger than file size (" << getFilesize() << ')');
+        throw ZimFileFormatError("last cluster offset larger than file size; file corrupt");
+      }
+    }
+
+    if (header.hasChecksum() && header.getChecksumPos() != (getFilesize().v-16) ) {
+      throw ZimFileFormatError("Checksum position is not valid");
+    }
+  }
+
+  offset_type FileImpl::getMimeListEndUpperLimit() const
+  {
+    offset_type result(header.getUrlPtrPos());
+    result = std::min(result, header.getTitleIdxPos());
+    result = std::min(result, header.getClusterPtrPos());
+    if ( getCountArticles().v != 0 ) {
+      // assuming that dirents are placed in the zim file in the same
+      // order as the corresponding entries in the dirent pointer table
+      result = std::min(result, mp_urlDirentAccessor->getOffset(entry_index_t(0)).v);
+
+      // assuming that clusters are placed in the zim file in the same
+      // order as the corresponding entries in the cluster pointer table
+      result = std::min(result, readOffset(*clusterOffsetReader, 0).v);
+    }
+    return result;
+  }
+
+  void FileImpl::readMimeTypes()
+  {
+    // read mime types
+    // libzim write zims files two ways :
+    // - The old way by putting the urlPtrPos just after the mimetype.
+    // - The new way by putting the urlPtrPos at the end of the zim files.
+    //   In this case, the cluster data are always at 1024 bytes offset and we know that
+    //   mimetype list is before this.
+    // 1024 seems to be a good maximum size for the mimetype list, even for the "old" way.
+    const auto endMimeList = getMimeListEndUpperLimit();
+    if ( endMimeList <= header.getMimeListPos() ) {
+        throw(ZimFileFormatError("Bad ZIM archive"));
+    }
+    const zsize_t size(endMimeList - header.getMimeListPos());
+    if ( endMimeList > 1024 ) {
+        log_warn("The MIME-type list is abnormally large (" << size.v << " bytes)");
+    }
+    auto buffer = zimReader->get_buffer(offset_t(header.getMimeListPos()), size);
+    const char* const bufferEnd = buffer.data() + size.v;
+    const char* p = buffer.data();
+    while (*p != '\0') {
+      const char* zp = std::find(p, bufferEnd, '\0');
+
+      if (zp == bufferEnd) {
+        throw(ZimFileFormatError("Error getting mimelists."));
+      }
+
+      std::string mimeType(p, zp);
+      mimeTypes.push_back(mimeType);
+
+      p = zp+1;
+    }
+
+    const_cast<bool&>(m_newNamespaceScheme) = header.getMinorVersion() >= 1;
+    if (m_newNamespaceScheme) {
+      const_cast<entry_index_t&>(m_startUserEntry) = getNamespaceBeginOffset('C');
+      const_cast<entry_index_t&>(m_endUserEntry) = getNamespaceEndOffset('C');
+    } else {
+      const_cast<entry_index_t&>(m_endUserEntry) = getCountArticles();
+    }
+  }
+
+  FileImpl::FindxResult FileImpl::findx(char ns, const std::string& url)
+  {
+    return direntLookup().find(ns, url);
+  }
+
+  FileImpl::FindxResult FileImpl::findx(const std::string& url)
+  {
+    char ns;
+    std::string path;
+    try {
+      std::tie(ns, path) = parseLongPath(url);
+      return findx(ns, path);
+    } catch (...) {}
+    return { false, entry_index_t(0) };
+  }
+
+  static inline int direntCompareTitle(char ns, const std::string& title, const Dirent& dirent)
+  {
+    auto direntNs = dirent.getNamespace();
+    if (ns < direntNs) {
+      return -1;
+    }
+    if (ns > direntNs) {
+      return 1;
+    }
+    return title.compare(dirent.getTitle());
+  }
+
+  FileImpl::FindxTitleResult FileImpl::findxByTitle(char ns, const std::string& title)
+  {
+    return m_byTitleDirentLookup->find(ns, title);
+  }
+
+  FileCompound::PartRange
+  FileImpl::getFileParts(offset_t offset, zsize_t size)
+  {
+    return zimFile->locate(offset, size);
+  }
+
+  std::shared_ptr<const Dirent> FileImpl::getDirent(entry_index_t idx)
+  {
+    return mp_urlDirentAccessor->getDirent(idx);
+  }
+
+  std::shared_ptr<const Dirent> FileImpl::getDirentByTitle(title_index_t idx)
+  {
+    return mp_titleDirentAccessor->getDirent(idx);
+  }
+
+  entry_index_t FileImpl::getIndexByTitle(title_index_t idx) const
+  {
+    return mp_titleDirentAccessor->getDirectIndex(idx);
+  }
+
+  entry_index_t FileImpl::getFrontEntryCount() const
+  {
+    return entry_index_t(mp_titleDirentAccessor->getDirentCount().v);
+  }
+
+  void FileImpl::prepareArticleListByCluster() const
+  {
+    m_articleListByCluster.reserve(getUserEntryCount().v);
+
+    auto endIdx = getEndUserEntry().v;
+    for(auto i = getStartUserEntry().v; i < endIdx; i++)
+    {
+      // This is the offset of the dirent in the zimFile
+      auto indexOffset = mp_urlDirentAccessor->getOffset(entry_index_t(i));
+      // Get the mimeType of the dirent (offset 0) to know the type of the dirent
+      uint16_t mimeType = zimReader->read_uint<uint16_t>(indexOffset);
+      if (mimeType==Dirent::redirectMimeType || mimeType==Dirent::linktargetMimeType || mimeType == Dirent::deletedMimeType) {
+        m_articleListByCluster.push_back(std::make_pair(0, i));
+      } else {
+        // If it is a classic article, get the clusterNumber (at offset 8)
+        auto clusterNumber = zimReader->read_uint<zim::cluster_index_type>(indexOffset+offset_t(8));
+        m_articleListByCluster.push_back(std::make_pair(clusterNumber, i));
+      }
+    }
+    std::sort(m_articleListByCluster.begin(), m_articleListByCluster.end());
+  }
+
+  entry_index_t FileImpl::getIndexByClusterOrder(entry_index_t idx) const
+  {
+    // Not using std::call_once because it is buggy. See the comment
+    // in FileImpl::direntLookup().
+    if ( m_articleListByCluster.empty() ) {
+      std::lock_guard<std::mutex> lock(m_articleListByClusterMutex);
+      if ( m_articleListByCluster.empty() ) {
+        prepareArticleListByCluster();
+      }
+    }
+    if (idx.v >= m_articleListByCluster.size())
+      throw std::out_of_range("entry index out of range");
+    return entry_index_t(m_articleListByCluster[idx.v].second);
+  }
+
+  FileImpl::ClusterHandle FileImpl::readCluster(cluster_index_t idx)
+  {
+    offset_t clusterOffset(getClusterOffset(idx));
+    log_debug("read cluster " << idx << " from offset " << clusterOffset);
+    return Cluster::read(*zimReader, clusterOffset);
+  }
+
+  std::shared_ptr<const Cluster> FileImpl::getCluster(cluster_index_t idx)
+  {
+    if (idx >= getCountClusters())
+      throw ZimFileFormatError("cluster index out of range");
+
+    auto cluster = clusterCache.getOrPut(idx.v, [=](){ return readCluster(idx); });
+#if ENV32BIT
+    // There was a bug in the way we create the zim files using ZSTD compression.
+    // We were using a too hight compression level and so a window of 128Mb.
+    // So at decompression, zstd reserve a 128Mb buffer.
+    // While this memory is not really used (thanks to lazy allocation of OS),
+    // we are still consumming address space. On 32bits this start to be a rare
+    // ressource when we reserved 128Mb at once.
+    // So we drop the cluster from the cache to avoid future memory allocation error.
+    if (cluster->getCompression() == Cluster::Compression::Zstd) {
+      // ZSTD compression starts to be used on version 5.0 of zim format.
+      // Recently after, we switch to 5.1 and itegrate the fix in zstd creation.
+      // 5.0 is not a perfect way to detect faulty zim file (it will generate false
+      // positives) but it should be enough.
+      if (header.getMajorVersion() == 5 && header.getMinorVersion() == 0) {
+        clusterCache.drop(idx.v);
+      }
+    }
+#endif
+    return cluster;
+  }
+
+  offset_t FileImpl::getClusterOffset(cluster_index_t idx) const
+  {
+    return readOffset(*clusterOffsetReader, idx.v);
+  }
+
+  offset_t FileImpl::getBlobOffset(cluster_index_t clusterIdx, blob_index_t blobIdx)
+  {
+    auto cluster = getCluster(clusterIdx);
+    if (cluster->isCompressed())
+      return offset_t(0);
+    return getClusterOffset(clusterIdx) + cluster->getBlobOffset(blobIdx);
+  }
+
+  entry_index_t FileImpl::getNamespaceBeginOffset(char ch) const
+  {
+    log_trace("getNamespaceBeginOffset(" << ch << ')');
+    return direntLookup().getNamespaceRangeBegin(ch);
+  }
+
+  entry_index_t FileImpl::getNamespaceEndOffset(char ch) const
+  {
+    log_trace("getNamespaceEndOffset(" << ch << ')');
+    return direntLookup().getNamespaceRangeEnd(ch);
+  }
+
+  const std::string& FileImpl::getMimeType(uint16_t idx) const
+  {
+    if (idx >= mimeTypes.size())
+    {
+      std::ostringstream msg;
+      msg << "unknown mime type code " << idx;
+      throw ZimFileFormatError(msg.str());
+    }
+
+    return mimeTypes[idx];
+  }
+
+  std::string FileImpl::getChecksum()
+  {
+    if (!header.hasChecksum())
+      return std::string();
+
+    try {
+      auto chksum = zimReader->get_buffer(offset_t(header.getChecksumPos()), zsize_t(16));
+
+      char hexdigest[33];
+      hexdigest[32] = '\0';
+      static const char hex[] = "0123456789abcdef";
+      char* p = hexdigest;
+      for (int i = 0; i < 16; ++i)
+      {
+        uint8_t v = chksum.at(offset_t(i));
+        *p++ = hex[v >> 4];
+        *p++ = hex[v & 0xf];
+      }
+      log_debug("chksum=" << hexdigest);
+      return hexdigest;
+    } catch (...)
+    {
+      log_warn("error reading checksum");
+      return std::string();
+    }
+  }
+
+  bool FileImpl::verify()
+  {
+    if (!header.hasChecksum())
+      return false;
+
+    struct zim_MD5_CTX md5ctx;
+    zim_MD5Init(&md5ctx);
+
+    offset_type checksumPos = header.getChecksumPos();
+    offset_type currentPos = 0;
+    for(auto part = zimFile->begin();
+        part != zimFile->end();
+        part++) {
+      std::ifstream stream(part->second->filename(), std::ios_base::in|std::ios_base::binary);
+
+      char ch;
+      for(/*NOTHING*/ ; currentPos < checksumPos && stream.get(ch).good(); currentPos++) {
+        zim_MD5Update(&md5ctx, reinterpret_cast<const uint8_t*>(&ch), 1);
+      }
+      if (stream.bad()) {
+        perror("error while reading file");
+        return false;
+      }
+      if (currentPos == checksumPos) {
+        break;
+      }
+    }
+
+    if (currentPos != checksumPos) {
+      return false;
+    }
+
+    unsigned char chksumCalc[16];
+    auto chksumFile = zimReader->get_buffer(offset_t(header.getChecksumPos()), zsize_t(16));
+
+    zim_MD5Final(chksumCalc, &md5ctx);
+    if (std::memcmp(chksumFile.data(), chksumCalc, 16) != 0)
+    {
+      return false;
+    }
+
+    return true;
+  }
+
+  time_t FileImpl::getMTime() const {
+    return zimFile->getMTime();
+  }
+
+  zim::zsize_t FileImpl::getFilesize() const {
+    return zimReader->size();
+  }
+
+  bool FileImpl::is_multiPart() const {
+    return zimFile->is_multiPart();
+  }
+
+  bool FileImpl::checkIntegrity(IntegrityCheck checkType) {
+    switch(checkType) {
+      case IntegrityCheck::CHECKSUM: return FileImpl::checkChecksum();
+      case IntegrityCheck::DIRENT_PTRS: return FileImpl::checkDirentPtrs();
+      case IntegrityCheck::DIRENT_ORDER: return FileImpl::checkDirentOrder();
+      case IntegrityCheck::TITLE_INDEX: return FileImpl::checkTitleIndex();
+      case IntegrityCheck::CLUSTER_PTRS: return FileImpl::checkClusterPtrs();
+      case IntegrityCheck::DIRENT_MIMETYPES: return FileImpl::checkDirentMimeTypes();
+      case IntegrityCheck::COUNT: ASSERT("shouldn't have reached here", ==, "");
+    }
+    return false;
+  }
+
+  bool FileImpl::checkChecksum() {
+    if ( ! verify() ) {
+        std::cerr << "Checksum doesn't match" << std::endl;
+        return false;
+    }
+    return true;
+  }
+
+  bool FileImpl::checkDirentPtrs() {
+    const entry_index_type articleCount = getCountArticles().v;
+    const offset_t validDirentRangeStart(80); // XXX: really???
+    const offset_t validDirentRangeEnd = header.hasChecksum()
+                                       ? offset_t(header.getChecksumPos())
+                                       : offset_t(zimReader->size().v);
+    const zsize_t direntMinSize(11);
+    for ( entry_index_type i = 0; i < articleCount; ++i )
+    {
+      const auto offset = mp_urlDirentAccessor->getOffset(entry_index_t(i));
+      if ( offset < validDirentRangeStart ||
+           offset + direntMinSize > validDirentRangeEnd ) {
+        std::cerr << "Invalid dirent pointer" << std::endl;
+        return false;
+      }
+    }
+    return true;
+  }
+
+  bool FileImpl::checkDirentOrder() {
+    const entry_index_type articleCount = getCountArticles().v;
+    std::shared_ptr<const Dirent> prevDirent;
+    for ( entry_index_type i = 0; i < articleCount; ++i )
+    {
+      const std::shared_ptr<const Dirent> dirent = mp_urlDirentAccessor->getDirent(entry_index_t(i));
+      if ( prevDirent && !(prevDirent->getLongUrl() < dirent->getLongUrl()) )
+      {
+        std::cerr << "Dirent table is not properly sorted:\n"
+                  << "  #" << i-1 << ": " << prevDirent->getLongUrl() << "\n"
+                  << "  #" << i   << ": " << dirent->getLongUrl() << std::endl;
+        return false;
+      }
+      prevDirent = dirent;
+    }
+    return true;
+  }
+
+  bool FileImpl::checkClusterPtrs() {
+    const cluster_index_type clusterCount = getCountClusters().v;
+    const offset_t validClusterRangeStart(80); // XXX: really???
+    const offset_t validClusterRangeEnd = header.hasChecksum()
+                                       ? offset_t(header.getChecksumPos())
+                                       : offset_t(zimReader->size().v);
+    const zsize_t clusterMinSize(1); // XXX
+    for ( cluster_index_type i = 0; i < clusterCount; ++i )
+    {
+      const auto offset = readOffset(*clusterOffsetReader, i);
+      if ( offset < validClusterRangeStart ||
+           offset + clusterMinSize > validClusterRangeEnd ) {
+        std::cerr << "Invalid cluster pointer" << std::endl;
+        return false;
+      }
+    }
+    return true;
+  }
+
+namespace
+{
+
+std::string pseudoTitle(const Dirent& d)
+{
+  return std::string(1, d.getNamespace()) + '/' + d.getTitle();
+}
+
+bool checkTitleListing(const IndirectDirentAccessor& accessor, entry_index_type totalCount) {
+  const entry_index_type direntCount = accessor.getDirentCount().v;
+  std::shared_ptr<const Dirent> prevDirent;
+  for ( entry_index_type i = 0; i < direntCount; ++i ) {
+    if (accessor.getDirectIndex(title_index_t(i)).v >= totalCount) {
+      std::cerr << "Invalid title index entry." << std::endl;
+      return false;
+    }
+
+    const std::shared_ptr<const Dirent> dirent = accessor.getDirent(title_index_t(i));
+    if ( prevDirent && !(pseudoTitle(*prevDirent) <= pseudoTitle(*dirent)) ) {
+      std::cerr << "Title index is not properly sorted." << std::endl;
+      return false;
+    }
+    prevDirent = dirent;
+  }
+  return true;
+}
+
+} // unnamed namespace
+
+  bool FileImpl::checkTitleIndex() {
+    const entry_index_type articleCount = getCountArticles().v;
+
+    offset_t titleOffset(header.getTitleIdxPos());
+    zsize_t  titleSize(sizeof(entry_index_type)*header.getArticleCount());
+    auto titleDirentAccessor = getTitleAccessor(titleOffset, titleSize, "Full Title index table");
+    auto ret = checkTitleListing(*titleDirentAccessor, articleCount);
+
+    titleDirentAccessor = getTitleAccessor("listing/titleOrdered/v1");
+    if (titleDirentAccessor) {
+      ret &= checkTitleListing(*titleDirentAccessor, articleCount);
+    }
+    return ret;
+  }
+
+  bool FileImpl::checkDirentMimeTypes() {
+    const entry_index_type articleCount = getCountArticles().v;
+    for ( entry_index_type i = 0; i < articleCount; ++i )
+    {
+      const auto dirent = mp_urlDirentAccessor->getDirent(entry_index_t(i));
+      if ( dirent->isArticle() && dirent->getMimeType() >= mimeTypes.size() ) {
+        std::cerr << "Entry " << dirent->getLongUrl()
+                  << " has invalid MIME-type value " << dirent->getMimeType()
+                  << "." << std::endl;
+        return false;
+      }
+    }
+    return true;
+  }
+
+}
diff --git a/src/fileimpl.h b/src/fileimpl.h
new file mode 100644
index 0000000..4e1f7d7
--- /dev/null
+++ b/src/fileimpl.h
@@ -0,0 +1,182 @@
+/*
+ * Copyright (C) 2017-2021 Matthieu Gautier <mgautier@kymeria.fr>
+ * Copyright (C) 2020-2021 Veloman Yunkan
+ * Copyright (C) 2006 Tommi Maekitalo
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#ifndef ZIM_FILEIMPL_H
+#define ZIM_FILEIMPL_H
+
+#include <string>
+#include <vector>
+#include <map>
+#include <memory>
+#include <zim/zim.h>
+#include <mutex>
+#include "lrucache.h"
+#include "concurrent_cache.h"
+#include "_dirent.h"
+#include "dirent_accessor.h"
+#include "dirent_lookup.h"
+#include "cluster.h"
+#include "buffer.h"
+#include "file_reader.h"
+#include "file_compound.h"
+#include "fileheader.h"
+#include "zim_types.h"
+#include "direntreader.h"
+
+
+namespace zim
+{
+  class FileImpl
+  {
+      std::shared_ptr<FileCompound> zimFile;
+      offset_t archiveStartOffset;
+      std::shared_ptr<Reader> zimReader;
+      std::shared_ptr<DirentReader> direntReader;
+      Fileheader header;
+
+      std::unique_ptr<const Reader> clusterOffsetReader;
+
+      std::shared_ptr<const DirectDirentAccessor> mp_urlDirentAccessor;
+      std::unique_ptr<const IndirectDirentAccessor> mp_titleDirentAccessor;
+
+      typedef std::shared_ptr<const Cluster> ClusterHandle;
+      ConcurrentCache<cluster_index_type, ClusterHandle> clusterCache;
+
+      const bool m_newNamespaceScheme;
+      const bool m_hasFrontArticlesIndex;
+      const entry_index_t m_startUserEntry;
+      const entry_index_t m_endUserEntry;
+
+      typedef std::vector<std::string> MimeTypes;
+      MimeTypes mimeTypes;
+
+      using pair_type = std::pair<cluster_index_type, entry_index_type>;
+      mutable std::vector<pair_type> m_articleListByCluster;
+      mutable std::mutex m_articleListByClusterMutex;
+
+      struct DirentLookupConfig
+      {
+        typedef DirectDirentAccessor DirentAccessorType;
+        typedef entry_index_t index_t;
+        static const std::string& getDirentKey(const Dirent& d) {
+          return d.getUrl();
+        }
+      };
+
+      using DirentLookup = zim::FastDirentLookup<DirentLookupConfig>;
+      mutable std::unique_ptr<DirentLookup> m_direntLookup;
+      mutable std::mutex m_direntLookupCreationMutex;
+
+
+      struct ByTitleDirentLookupConfig
+      {
+        typedef IndirectDirentAccessor DirentAccessorType;
+        typedef title_index_t index_t;
+        static const std::string& getDirentKey(const Dirent& d) {
+          return d.getTitle();
+        }
+      };
+
+      using ByTitleDirentLookup = zim::DirentLookup<ByTitleDirentLookupConfig>;
+      std::unique_ptr<ByTitleDirentLookup> m_byTitleDirentLookup;
+
+    public:
+      using FindxResult = std::pair<bool, entry_index_t>;
+      using FindxTitleResult = std::pair<bool, title_index_t>;
+
+      explicit FileImpl(const std::string& fname);
+#ifndef _WIN32
+      explicit FileImpl(int fd);
+      FileImpl(int fd, offset_t offset, zsize_t size);
+#endif
+
+      offset_t getArchiveStartOffset() const { return archiveStartOffset; }
+      time_t getMTime() const;
+
+      const std::string& getFilename() const   { return zimFile->filename(); }
+      const Fileheader& getFileheader() const  { return header; }
+      zsize_t getFilesize() const;
+      bool hasNewNamespaceScheme() const { return m_newNamespaceScheme; }
+      bool hasFrontArticlesIndex() const { return m_hasFrontArticlesIndex; }
+
+      FileCompound::PartRange getFileParts(offset_t offset, zsize_t size);
+      std::shared_ptr<const Dirent> getDirent(entry_index_t idx);
+      std::shared_ptr<const Dirent> getDirentByTitle(title_index_t idx);
+      entry_index_t getIndexByTitle(title_index_t idx) const;
+      entry_index_t getIndexByClusterOrder(entry_index_t idx) const;
+      entry_index_t getCountArticles() const { return entry_index_t(header.getArticleCount()); }
+
+      FindxResult findx(char ns, const std::string& url);
+      FindxResult findx(const std::string& url);
+      FindxTitleResult findxByTitle(char ns, const std::string& title);
+
+      std::shared_ptr<const Cluster> getCluster(cluster_index_t idx);
+      cluster_index_t getCountClusters() const       { return cluster_index_t(header.getClusterCount()); }
+      offset_t getClusterOffset(cluster_index_t idx) const;
+      offset_t getBlobOffset(cluster_index_t clusterIdx, blob_index_t blobIdx);
+
+      entry_index_t getNamespaceBeginOffset(char ch) const;
+      entry_index_t getNamespaceEndOffset(char ch) const;
+      entry_index_t getNamespaceEntryCount(char ch) const {
+        return getNamespaceEndOffset(ch) - getNamespaceBeginOffset(ch);
+      }
+
+      entry_index_t getStartUserEntry() const { return m_startUserEntry; }
+      entry_index_t getEndUserEntry() const { return m_endUserEntry; }
+      // The number of entries added by the creator. (So excluding index, ...).
+      // On new namespace scheme, number of entries in C namespace
+      entry_index_t getUserEntryCount() const { return m_endUserEntry - m_startUserEntry; }
+      // The number of enties that can be considered as front article (no resource)
+      entry_index_t getFrontEntryCount() const;
+
+      const std::string& getMimeType(uint16_t idx) const;
+
+      std::string getChecksum();
+      bool verify();
+      bool is_multiPart() const;
+
+      bool checkIntegrity(IntegrityCheck checkType);
+  private:
+      explicit FileImpl(std::shared_ptr<FileCompound> zimFile);
+      FileImpl(std::shared_ptr<FileCompound> zimFile, offset_t offset, zsize_t size);
+
+      std::unique_ptr<IndirectDirentAccessor> getTitleAccessor(const std::string& path);
+      std::unique_ptr<IndirectDirentAccessor> getTitleAccessor(const offset_t offset, const zsize_t size, const std::string& name);
+
+      void prepareArticleListByCluster() const;
+      DirentLookup& direntLookup() const;
+      ClusterHandle readCluster(cluster_index_t idx);
+      offset_type getMimeListEndUpperLimit() const;
+      void readMimeTypes();
+      void quickCheckForCorruptFile();
+
+      bool checkChecksum();
+      bool checkDirentPtrs();
+      bool checkDirentOrder();
+      bool checkTitleIndex();
+      bool checkClusterPtrs();
+      bool checkDirentMimeTypes();
+  };
+
+}
+
+#endif // ZIM_FILEIMPL_H
+
diff --git a/src/fs.h b/src/fs.h
new file mode 100644
index 0000000..5736a5e
--- /dev/null
+++ b/src/fs.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2018 Matthieu Gautier <mgautier@kymeria.fr>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#ifndef ZIM_FS_H_
+#define ZIM_FS_H_
+
+#ifdef _WIN32
+# include "fs_windows.h"
+#else
+# include "fs_unix.h"
+#endif
+
+namespace zim {
+
+#ifdef _WIN32
+using DEFAULTFS = windows::FS;
+#else
+using DEFAULTFS = unix::FS;
+#endif
+};
+
+#endif //ZIM_FS_H_
diff --git a/src/fs_unix.cpp b/src/fs_unix.cpp
new file mode 100644
index 0000000..e5f404f
--- /dev/null
+++ b/src/fs_unix.cpp
@@ -0,0 +1,149 @@
+/*
+ * Copyright (C) 2018 Matthieu Gautier <mgautier@kymeria.fr>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#include "fs_unix.h"
+#include <stdexcept>
+#include <vector>
+#include <sstream>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <dirent.h>
+#include <errno.h>
+#include <string.h>
+
+namespace zim
+{
+
+namespace unix {
+
+zsize_t FD::readAt(char* dest, zsize_t size, offset_t offset) const
+{
+#if defined(__APPLE__) || defined(__OpenBSD__) || defined(__FreeBSD__)
+# define PREAD pread
+#else
+# define PREAD pread64
+#endif
+  ssize_t full_size_read = 0;
+  auto size_to_read = size.v;
+  auto current_offset = offset.v;
+  errno = 0;
+  while (size_to_read > 0) {
+    auto size_read = PREAD(m_fd, dest, size_to_read, current_offset);
+    if (size_read == -1) {
+      return zsize_t(-1);
+    }
+    size_to_read -= size_read;
+    current_offset += size_read;
+    full_size_read += size_read;
+  }
+  return zsize_t(full_size_read);
+#undef PREAD
+}
+
+zsize_t FD::getSize() const
+{
+  struct stat sb;
+  fstat(m_fd, &sb);
+  return zsize_t(sb.st_size);
+}
+
+bool FD::seek(offset_t offset)
+{
+    return static_cast<int64_t>(offset.v) == lseek(m_fd, offset.v, SEEK_SET);
+}
+
+bool FD::close() {
+  if (m_fd != -1) {
+    return ::close(m_fd);
+  }
+  return -1;
+}
+
+FD FS::openFile(path_t filepath)
+{
+  int fd = open(filepath.c_str(), O_RDONLY);
+  if (fd == -1) {
+    const std::string errorStr = strerror(errno);
+    throw std::runtime_error("Error opening file: " + filepath + ": " + errorStr);
+  }
+  return FD(fd);
+}
+
+bool FS::makeDirectory(path_t path)
+{
+  return !mkdir(path.c_str(), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH);
+}
+
+void FS::rename(path_t old_path, path_t new_path)
+{
+  ::rename(old_path.c_str(), new_path.c_str());
+}
+
+std::string FS::join(path_t base, path_t name)
+{
+  return base + "/" + name;
+}
+
+bool FS::remove(path_t path)
+{
+  DIR* dir;
+  /* It's a directory, remove all its entries first */
+  if ((dir = opendir(path.c_str())) != NULL) {
+    struct dirent* ent;
+    while ((ent = readdir(dir)) != NULL) {
+      std::string childName = ent->d_name;
+      if (childName !=  "." && childName != "..") {
+        auto childPath = join(path, childName);
+        remove(childPath);
+      }
+    }
+    closedir(dir);
+    return removeDir(path);
+  }
+
+  /* It's a file */
+  else {
+    return removeFile(path);
+  }
+}
+
+bool FS::removeDir(path_t path) {
+  return rmdir(path.c_str());
+}
+
+bool FS::removeFile(path_t path) {
+  return ::remove(path.c_str());
+}
+
+
+}; // unix namespace
+
+std::string getFilePathFromFD(int fd)
+{
+  std::ostringstream oss;
+  oss << "/dev/fd/" << fd;
+
+  return oss.str();
+}
+
+}; // zim namespace
+
diff --git a/src/fs_unix.h b/src/fs_unix.h
new file mode 100644
index 0000000..51aab05
--- /dev/null
+++ b/src/fs_unix.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright (C) 2018 Matthieu Gautier <mgautier@kymeria.fr>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#ifndef ZIM_FS_UNIX_H_
+#define ZIM_FS_UNIX_H_
+
+#include "zim_types.h"
+
+#include <stdexcept>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <dirent.h>
+
+namespace zim {
+
+namespace unix {
+
+using path_t = const std::string&;
+
+class FD {
+  public:
+    using fd_t = int;
+
+  private:
+    fd_t m_fd = -1;
+
+  public:
+    FD() = default;
+    FD(fd_t fd):
+      m_fd(fd) {};
+    FD(const FD& o) = delete;
+    FD(FD&& o) :
+      m_fd(o.m_fd) { o.m_fd = -1; }
+    FD& operator=(FD&& o) {
+      m_fd = o.m_fd;
+      o.m_fd = -1;
+      return *this;
+    }
+    ~FD() { close(); }
+    zsize_t readAt(char* dest, zsize_t size, offset_t offset) const;
+    zsize_t getSize() const;
+    fd_t    getNativeHandle() const
+    {
+        return m_fd;
+    }
+    fd_t    release()
+    {
+        int ret = m_fd;
+        m_fd = -1;
+        return ret;
+    }
+    bool    seek(offset_t offset);
+    bool    close();
+};
+
+struct FS {
+    using FD = zim::unix::FD;
+    static std::string join(path_t base, path_t name);
+    static FD    openFile(path_t filepath);
+    static bool  makeDirectory(path_t path);
+    static void  rename(path_t old_path, path_t new_path);
+    static bool  remove(path_t path);
+    static bool  removeDir(path_t path);
+    static bool  removeFile(path_t path);
+};
+
+}; // unix namespace
+
+std::string getFilePathFromFD(int fd);
+
+}; // zim namespace
+
+#endif //ZIM_FS_UNIX_H_
diff --git a/src/fs_windows.cpp b/src/fs_windows.cpp
new file mode 100644
index 0000000..4fe5684
--- /dev/null
+++ b/src/fs_windows.cpp
@@ -0,0 +1,201 @@
+/*
+ * Copyright (C) 2018 Matthieu Gautier <mgautier@kymeria.fr>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#include "fs_windows.h"
+#include <stdexcept>
+
+#include <windows.h>
+#include <winbase.h>
+#include <synchapi.h>
+#include <io.h>
+#include <fileapi.h>
+
+#include <iostream>
+#include <sstream>
+
+namespace zim {
+
+namespace windows {
+
+struct ImplFD {
+  HANDLE m_handle = INVALID_HANDLE_VALUE;
+  CRITICAL_SECTION m_criticalSection;
+
+  ImplFD() {
+    InitializeCriticalSection(&m_criticalSection);
+  }
+  ImplFD(HANDLE handle) :
+    m_handle(handle)
+  {
+    InitializeCriticalSection(&m_criticalSection);
+  }
+
+  ~ImplFD() {
+    DeleteCriticalSection(&m_criticalSection);
+  }
+};
+
+FD::FD() :
+  mp_impl(new ImplFD()) {}
+
+FD::FD(fd_t handle) :
+  mp_impl(new ImplFD(handle)) {}
+
+FD::FD(FD&& o) = default;
+FD& FD::operator=(FD&& o) = default;
+
+FD::~FD()
+{
+  if (mp_impl)
+    close();
+}
+
+zsize_t FD::readAt(char* dest, zsize_t size, offset_t offset) const
+{
+  if (!mp_impl)
+    return zsize_t(-1);
+  EnterCriticalSection(&mp_impl->m_criticalSection);
+  LARGE_INTEGER off;
+  off.QuadPart = offset.v;
+  if (!SetFilePointerEx(mp_impl->m_handle, off, NULL, FILE_BEGIN)) {
+    goto err;
+  }
+
+  DWORD size_read;
+  if (!ReadFile(mp_impl->m_handle, dest, size.v, &size_read, NULL)) {
+    goto err;
+  }
+  if (size_read != size.v) {
+    goto err;
+  }
+  LeaveCriticalSection(&mp_impl->m_criticalSection);
+  return size;
+err:
+  LeaveCriticalSection(&mp_impl->m_criticalSection);
+  return zsize_t(-1);
+}
+
+bool FD::seek(offset_t offset)
+{
+  if(!mp_impl)
+    return false;
+  LARGE_INTEGER off;
+  off.QuadPart = offset.v;
+  return SetFilePointerEx(mp_impl->m_handle, off, NULL, FILE_BEGIN);
+}
+
+zsize_t FD::getSize() const
+{
+  if(!mp_impl)
+    return zsize_t(0);
+  LARGE_INTEGER size;
+  if (!GetFileSizeEx(mp_impl->m_handle, &size)) {
+    size.QuadPart = 0;
+  }
+  return zsize_t(size.QuadPart);
+}
+
+int FD::release()
+{
+  if(!mp_impl)
+    return -1;
+  int ret = _open_osfhandle(reinterpret_cast<intptr_t>(mp_impl->m_handle), 0);
+  mp_impl->m_handle = INVALID_HANDLE_VALUE;
+  return ret;
+}
+
+bool FD::close()
+{
+  if (!mp_impl || mp_impl->m_handle == INVALID_HANDLE_VALUE) {
+    return false;
+  }
+  return CloseHandle(mp_impl->m_handle);
+}
+
+std::unique_ptr<wchar_t[]> FS::toWideChar(path_t path)
+{
+  auto size = MultiByteToWideChar(CP_UTF8, 0,
+                path.c_str(), -1, nullptr, 0);
+  auto wdata = std::unique_ptr<wchar_t[]>(new wchar_t[size]);
+  auto ret = MultiByteToWideChar(CP_UTF8, 0,
+                path.c_str(), -1, wdata.get(), size);
+  if (0 == ret) {
+    std::ostringstream oss;
+    oss << "Cannot convert path to wchar : " << GetLastError();
+    throw std::runtime_error(oss.str());
+  }
+  return wdata;
+}
+
+FD FS::openFile(path_t filepath)
+{
+  auto wpath = toWideChar(filepath);
+  FD::fd_t handle;
+  handle = CreateFileW(wpath.get(),
+             GENERIC_READ,
+             FILE_SHARE_READ,
+             NULL,
+             OPEN_EXISTING,
+             FILE_ATTRIBUTE_READONLY|FILE_FLAG_RANDOM_ACCESS,
+             NULL);
+  if (handle == INVALID_HANDLE_VALUE) {
+    std::ostringstream oss;
+    oss << "Cannot open file : " << GetLastError();
+    throw std::runtime_error(oss.str());
+  }
+  return FD(handle);
+}
+
+bool FS::makeDirectory(path_t path)
+{
+  auto wpath = toWideChar(path);
+  auto ret = CreateDirectoryW(wpath.get(), NULL);
+  return ret;
+}
+
+
+void FS::rename(path_t old_path, path_t new_path)
+{
+  auto ret = MoveFileExW(toWideChar(old_path).get(), toWideChar(new_path).get(), MOVEFILE_REPLACE_EXISTING|MOVEFILE_WRITE_THROUGH);
+  if (!ret) {
+    std::ostringstream oss;
+    oss << "Cannot move file " << old_path << " to " << new_path;
+    throw std::runtime_error(oss.str());
+  }
+}
+
+std::string FS::join(path_t base, path_t name)
+{
+  return base + "\\" + name;
+}
+
+bool FS::removeDir(path_t path)
+{
+  return RemoveDirectoryW(toWideChar(path).get());
+}
+
+bool FS::removeFile(path_t path)
+{
+  return DeleteFileW(toWideChar(path).get());
+}
+
+}; // windows namespace
+
+}; // zim namespace
+
diff --git a/src/fs_windows.h b/src/fs_windows.h
new file mode 100644
index 0000000..9e4ae07
--- /dev/null
+++ b/src/fs_windows.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (C) 2018 Matthieu Gautier <mgautier@kymeria.fr>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#ifndef ZIM_FS_WINDOWS_H_
+#define ZIM_FS_WINDOWS_H_
+
+#include "zim_types.h"
+
+#include <stdexcept>
+#include <memory>
+
+typedef void* HANDLE;
+
+namespace zim {
+
+namespace windows {
+
+using path_t = const std::string&;
+
+struct ImplFD;
+
+class FD {
+  public:
+    typedef HANDLE fd_t;
+  private:
+    std::unique_ptr<ImplFD> mp_impl;
+
+  public:
+    FD();
+    FD(fd_t handle);
+    FD(const FD& o) = delete;
+    FD(FD&& o);
+    FD& operator=(FD&& o);
+    FD& operator=(const FD& o) = delete;
+    ~FD();
+    zsize_t readAt(char* dest, zsize_t size, offset_t offset) const;
+    zsize_t getSize() const;
+    int     release();
+    bool    seek(offset_t offset);
+    bool    close();
+};
+
+struct FS {
+    using FD = zim::windows::FD;
+    static std::string join(path_t base, path_t name);
+    static std::unique_ptr<wchar_t[]> toWideChar(path_t path);
+    static FD   openFile(path_t filepath);
+    static bool makeDirectory(path_t path);
+    static void rename(path_t old_path, path_t new_path);
+    static bool remove(path_t path);
+    static bool removeDir(path_t path);
+    static bool removeFile(path_t path);
+};
+
+}; // windows namespace
+
+}; // zim namespace
+
+#endif //ZIM_FS_WINDOWS_H_
diff --git a/src/istreamreader.cpp b/src/istreamreader.cpp
new file mode 100644
index 0000000..9ac4830
--- /dev/null
+++ b/src/istreamreader.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2020 Matthieu Gautier <mgautier@kymeria.fr>
+ * Copyright (C) 2020 Veloman Yunkan
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#include "istreamreader.h"
+#include "buffer_reader.h"
+
+namespace zim
+{
+
+////////////////////////////////////////////////////////////////////////////////
+// IDataStream
+////////////////////////////////////////////////////////////////////////////////
+
+std::unique_ptr<const Reader>
+IStreamReader::sub_reader(zsize_t size)
+{
+  auto buffer = Buffer::makeBuffer(size);
+  readImpl(const_cast<char*>(buffer.data()), size);
+  return std::unique_ptr<Reader>(new BufferReader(buffer));
+}
+
+} // namespace zim
diff --git a/src/istreamreader.h b/src/istreamreader.h
new file mode 100644
index 0000000..4255d3f
--- /dev/null
+++ b/src/istreamreader.h
@@ -0,0 +1,88 @@
+/*
+ * Copyright (C) 2020 Matthieu Gautier <mgautier@kymeria.fr>
+ * Copyright (C) 2020 Veloman Yunkan
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#ifndef ZIM_IDATASTREAM_H
+#define ZIM_IDATASTREAM_H
+
+#include <exception>
+#include <memory>
+
+#include "endian_tools.h"
+#include "reader.h"
+
+namespace zim
+{
+
+// IDataStream is a simple interface for sequential iteration over a stream
+// of values of built-in/primitive types and/or opaque binary objects (blobs).
+// An example usage:
+//
+//   void foo(IDataStream& s)
+//   {
+//     const uint32_t n = s.read<uint32_t>();
+//     for(uint32_t i=0; i < n; ++i)
+//     {
+//        const uint16_t blobSize = s.read<uint16_t>();
+//        IDataStream::Blob blob = s.readBlob(blobSize);
+//        bar(blob, blobSize);
+//     }
+//   }
+//
+class IStreamReader
+{
+public: // functions
+  virtual ~IStreamReader() = default;
+
+  // Reads a value of the said type from the stream
+  //
+  // For best portability this function should be used with types of known
+  // bit-width (int32_t, uint16_t, etc) rather than builtin types with
+  // unknown bit-width (int, unsigned, etc).
+  template<typename T> T read();
+
+  // Reads a blob of the specified size from the stream
+  virtual std::unique_ptr<const Reader> sub_reader(zsize_t size);
+
+private: // virtual methods
+  // Reads exactly 'nbytes' bytes into the provided buffer 'buf'
+  // (which must be at least that big). Throws an exception if
+  // more bytes are requested than can be retrieved.
+  virtual void readImpl(char* buf, zsize_t nbytes) = 0;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+// Implementation of IDataStream
+////////////////////////////////////////////////////////////////////////////////
+
+// XXX: Assuming that opaque binary data retrieved via 'readImpl()'
+// XXX: is encoded in little-endian form.
+template<typename T>
+inline T
+IStreamReader::read()
+{
+  constexpr size_type N(sizeof(T));
+  char buf[N];
+  readImpl(buf, zsize_t(N));
+  return fromLittleEndian<T>(buf); // XXX: This handles only integral types
+}
+
+} // namespace zim
+
+#endif // ZIM_IDATASTREAM_H
diff --git a/src/item.cpp b/src/item.cpp
new file mode 100644
index 0000000..19a95e9
--- /dev/null
+++ b/src/item.cpp
@@ -0,0 +1,105 @@
+/*
+ * Copyright (C) 2021 Veloman Yunkan
+ * Copyright (C) 2020 Matthieu Gautier <mgautier@kymeria.fr>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#define ZIM_PRIVATE
+#include <zim/item.h>
+#include "_dirent.h"
+#include "cluster.h"
+#include "fileimpl.h"
+#include "file_part.h"
+#include "log.h"
+
+log_define("zim.item")
+
+using namespace zim;
+
+Item::Item(std::shared_ptr<FileImpl> file, entry_index_type idx)
+  : m_file(file),
+    m_idx(idx),
+    m_dirent(file->getDirent(entry_index_t(idx)))
+{}
+
+std::string Item::getTitle() const
+{
+  return m_dirent->getTitle();
+}
+
+std::string Item::getPath() const
+{
+  if (m_file->hasNewNamespaceScheme()) {
+    return m_dirent->getUrl();
+  } else {
+    return m_dirent->getLongUrl();
+  }
+}
+
+std::string Item::getMimetype() const
+{
+  return m_file->getMimeType(m_dirent->getMimeType());
+}
+
+Blob Item::getData(offset_type offset) const
+{
+  auto size = getSize()-offset;
+  return getData(offset, size);
+}
+
+Blob Item::getData(offset_type offset, size_type size) const
+{
+  auto cluster = m_file->getCluster(m_dirent->getClusterNumber());
+  return cluster->getBlob(m_dirent->getBlobNumber(),
+                          offset_t(offset),
+                          zsize_t(size));
+}
+
+size_type Item::getSize() const
+{
+  auto cluster = m_file->getCluster(m_dirent->getClusterNumber());
+  return size_type(cluster->getBlobSize(m_dirent->getBlobNumber()));
+}
+
+std::pair<std::string, offset_type> Item::getDirectAccessInformation() const
+{
+  auto cluster = m_file->getCluster(m_dirent->getClusterNumber());
+  if (cluster->isCompressed()) {
+    return std::make_pair("", 0);
+  }
+
+  auto full_offset = m_file->getBlobOffset(m_dirent->getClusterNumber(),
+                                         m_dirent->getBlobNumber());
+
+  full_offset += m_file->getArchiveStartOffset().v;
+
+  auto part_its = m_file->getFileParts(full_offset, zsize_t(getSize()));
+  auto first_part = part_its.first;
+  if (++part_its.first != part_its.second) {
+   // The content is split on two parts.Â We cannot have direct access
+    return std::make_pair("", 0);
+  }
+  auto range = first_part->first;
+  auto part = first_part->second;
+  const offset_type local_offset(full_offset - range.min);
+  return std::make_pair(part->filename(), local_offset);
+}
+
+cluster_index_type Item::getClusterIndex() const
+{
+  return m_dirent->getClusterNumber().v;
+}
diff --git a/src/log.h b/src/log.h
new file mode 100644
index 0000000..5fbd81a
--- /dev/null
+++ b/src/log.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2009 Tommi Maekitalo
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#include "config.h"
+
+#ifdef WITH_CXXTOOLS
+
+#include <cxxtools/log.h>
+
+#else
+
+#define log_define(e)
+#define log_fatal(e)
+#define log_error(e)
+#define log_warn(e)
+#define log_info(e)
+#define log_debug(e)
+#define log_trace(e)
+#define log_init()
+
+#endif
diff --git a/src/lrucache.h b/src/lrucache.h
new file mode 100644
index 0000000..3389446
--- /dev/null
+++ b/src/lrucache.h
@@ -0,0 +1,160 @@
+/*
+ * Copyrigth (c) 2021, Matthieu Gautier <mgautier@kymeria.fr>
+ * Copyright (c) 2020, Veloman Yunkan
+ * Copyright (c) 2014, lamerman
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice, this
+ *   list of conditions and the following disclaimer.
+ *
+ * * Redistributions in binary form must reproduce the above copyright notice,
+ *   this list of conditions and the following disclaimer in the documentation
+ *   and/or other materials provided with the distribution.
+ *
+ * * Neither the name of lamerman nor the names of its
+ *   contributors may be used to endorse or promote products derived from
+ *   this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * File:   lrucache.hpp
+ * Author: Alexander Ponomarev
+ *
+ * Created on June 20, 2013, 5:09 PM
+ */
+
+#ifndef _LRUCACHE_HPP_INCLUDED_
+#define _LRUCACHE_HPP_INCLUDED_
+
+#include <map>
+#include <list>
+#include <cstddef>
+#include <stdexcept>
+#include <cassert>
+
+namespace zim {
+
+template<typename key_t, typename value_t>
+class lru_cache {
+public: // types
+  typedef typename std::pair<key_t, value_t> key_value_pair_t;
+  typedef typename std::list<key_value_pair_t>::iterator list_iterator_t;
+
+  enum AccessStatus {
+    HIT, // key was found in the cache
+    PUT, // key was not in the cache but was created by the getOrPut() access
+    MISS // key was not in the cache; get() access failed
+  };
+
+  class AccessResult
+  {
+    const AccessStatus status_;
+    const value_t val_;
+  public:
+    AccessResult(const value_t& val, AccessStatus status)
+      : status_(status), val_(val)
+    {}
+    AccessResult() : status_(MISS), val_() {}
+
+    bool hit() const { return status_ == HIT; }
+    bool miss() const { return !hit(); }
+    const value_t& value() const
+    {
+      if ( status_ == MISS )
+        throw std::range_error("There is no such key in cache");
+      return val_;
+    }
+
+    operator const value_t& () const { return value(); }
+  };
+
+public: // functions
+  explicit lru_cache(size_t max_size) :
+    _max_size(max_size) {
+  }
+
+  // If 'key' is present in the cache, returns the associated value,
+  // otherwise puts the given value into the cache (and returns it with
+  // a status of a cache miss).
+  AccessResult getOrPut(const key_t& key, const value_t& value) {
+    auto it = _cache_items_map.find(key);
+    if (it != _cache_items_map.end()) {
+      _cache_items_list.splice(_cache_items_list.begin(), _cache_items_list, it->second);
+      return AccessResult(it->second->second, HIT);
+    } else {
+      putMissing(key, value);
+      return AccessResult(value, PUT);
+    }
+  }
+
+  void put(const key_t& key, const value_t& value) {
+    auto it = _cache_items_map.find(key);
+    if (it != _cache_items_map.end()) {
+      _cache_items_list.splice(_cache_items_list.begin(), _cache_items_list, it->second);
+      it->second->second = value;
+    } else {
+      putMissing(key, value);
+    }
+  }
+
+  AccessResult get(const key_t& key) {
+    auto it = _cache_items_map.find(key);
+    if (it == _cache_items_map.end()) {
+      return AccessResult();
+    } else {
+      _cache_items_list.splice(_cache_items_list.begin(), _cache_items_list, it->second);
+      return AccessResult(it->second->second, HIT);
+    }
+  }
+
+  bool drop(const key_t& key) {
+    try {
+      auto list_it = _cache_items_map.at(key);
+      _cache_items_list.erase(list_it);
+      _cache_items_map.erase(key);
+      return true;
+    } catch (std::out_of_range& e) {
+      return false;
+    }
+  }
+
+  bool exists(const key_t& key) const {
+    return _cache_items_map.find(key) != _cache_items_map.end();
+  }
+
+  size_t size() const {
+    return _cache_items_map.size();
+  }
+
+private: // functions
+  void putMissing(const key_t& key, const value_t& value) {
+    assert(_cache_items_map.find(key) == _cache_items_map.end());
+    _cache_items_list.push_front(key_value_pair_t(key, value));
+    _cache_items_map[key] = _cache_items_list.begin();
+    if (_cache_items_map.size() > _max_size) {
+      _cache_items_map.erase(_cache_items_list.back().first);
+      _cache_items_list.pop_back();
+    }
+  }
+
+private: // data
+  std::list<key_value_pair_t> _cache_items_list;
+  std::map<key_t, list_iterator_t> _cache_items_map;
+  size_t _max_size;
+};
+
+} // namespace zim
+
+#endif  /* _LRUCACHE_HPP_INCLUDED_ */
diff --git a/src/md5.c b/src/md5.c
new file mode 100644
index 0000000..bae002e
--- /dev/null
+++ b/src/md5.c
@@ -0,0 +1,340 @@
+/* MD5C.C - RSA Data Security, Inc., MD5 message-digest algorithm
+ */
+
+/* Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All
+rights reserved.
+
+License to copy and use this software is granted provided that it
+is identified as the "RSA Data Security, Inc. MD5 Message-Digest
+Algorithm" in all material mentioning or referencing this software
+or this function.
+
+License is also granted to make and use derivative works provided
+that such works are identified as "derived from the RSA Data
+Security, Inc. MD5 Message-Digest Algorithm" in all material
+mentioning or referencing the derived work.
+
+RSA Data Security, Inc. makes no representations concerning either
+the merchantability of this software or the suitability of this
+software for any particular purpose. It is provided "as is"
+without express or implied warranty of any kind.
+
+These notices must be retained in any copies of any part of this
+documentation and/or software.
+ */
+
+#include "md5.h"
+#include <string.h>
+
+#define MD5_CTX struct zim_MD5_CTX
+
+/* Constants for MD5Transform routine.
+ */
+#define S11 7
+#define S12 12
+#define S13 17
+#define S14 22
+#define S21 5
+#define S22 9
+#define S23 14
+#define S24 20
+#define S31 4
+#define S32 11
+#define S33 16
+#define S34 23
+#define S41 6
+#define S42 10
+#define S43 15
+#define S44 21
+
+static void MD5Transform PROTO_LIST ((UINT4 [4], const unsigned char [64]));
+static void Encode PROTO_LIST
+  ((unsigned char *, UINT4 *, unsigned int));
+static void Decode PROTO_LIST
+  ((UINT4 *, const unsigned char *, unsigned int));
+/*
+static void MD5_memcpy PROTO_LIST ((POINTER, POINTER, unsigned int));
+static void MD5_memset PROTO_LIST ((POINTER, int, unsigned int));
+*/
+#define MD5_memcpy memcpy
+#define MD5_memset memset
+
+static unsigned char PADDING[64] = {
+  0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+/* F, G, H and I are basic MD5 functions.
+ */
+#define F(x, y, z) (((x) & (y)) | ((~x) & (z)))
+#define G(x, y, z) (((x) & (z)) | ((y) & (~z)))
+#define H(x, y, z) ((x) ^ (y) ^ (z))
+#define I(x, y, z) ((y) ^ ((x) | (~z)))
+
+/* ROTATE_LEFT rotates x left n bits.
+ */
+#define ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32-(n))))
+
+/* FF, GG, HH, and II transformations for rounds 1, 2, 3, and 4.
+Rotation is separate from addition to prevent recomputation.
+ */
+#define FF(a, b, c, d, x, s, ac) { \
+ (a) += F ((b), (c), (d)) + (x) + (UINT4)(ac); \
+ (a) = ROTATE_LEFT ((a), (s)); \
+ (a) += (b); \
+  }
+#define GG(a, b, c, d, x, s, ac) { \
+ (a) += G ((b), (c), (d)) + (x) + (UINT4)(ac); \
+ (a) = ROTATE_LEFT ((a), (s)); \
+ (a) += (b); \
+  }
+#define HH(a, b, c, d, x, s, ac) { \
+ (a) += H ((b), (c), (d)) + (x) + (UINT4)(ac); \
+ (a) = ROTATE_LEFT ((a), (s)); \
+ (a) += (b); \
+  }
+#define II(a, b, c, d, x, s, ac) { \
+ (a) += I ((b), (c), (d)) + (x) + (UINT4)(ac); \
+ (a) = ROTATE_LEFT ((a), (s)); \
+ (a) += (b); \
+  }
+
+/* MD5 initialization. Begins an MD5 operation, writing a new context.
+ */
+void zim_MD5Init (MD5_CTX* context)
+{
+  context->count[0] = context->count[1] = 0;
+  /* Load magic initialization constants.
+*/
+  context->state[0] = 0x67452301;
+  context->state[1] = 0xefcdab89;
+  context->state[2] = 0x98badcfe;
+  context->state[3] = 0x10325476;
+}
+
+/* MD5 block update operation. Continues an MD5 message-digest
+  operation, processing another message block, and updating the
+  context.
+ */
+void zim_MD5Update (
+MD5_CTX *context,
+const unsigned char *input,                          /* input block */
+unsigned int inputLen)                     /* length of input block */
+{
+  unsigned int i, index, partLen;
+
+  /* Compute number of bytes mod 64 */
+  index = (unsigned int)((context->count[0] >> 3) & 0x3F);
+
+  /* Update number of bits */
+  if ((context->count[0] += ((UINT4)inputLen << 3))
+   < ((UINT4)inputLen << 3))
+ context->count[1]++;
+  context->count[1] += ((UINT4)inputLen >> 29);
+
+  partLen = 64 - index;
+
+  /* Transform as many times as possible.
+*/
+  if (inputLen >= partLen) {
+ MD5_memcpy
+   ((POINTER)&context->buffer[index], (POINTER)input, partLen);
+ MD5Transform (context->state, context->buffer);
+
+ for (i = partLen; i + 63 < inputLen; i += 64)
+   MD5Transform (context->state, &input[i]);
+
+ index = 0;
+  }
+  else
+ i = 0;
+
+  /* Buffer remaining input */
+  MD5_memcpy
+ ((POINTER)&context->buffer[index], (POINTER)&input[i],
+  inputLen-i);
+}
+
+/* MD5 finalization. Ends an MD5 message-digest operation, writing the
+  the message digest and zeroizing the context.
+ */
+void zim_MD5Final (
+unsigned char digest[16],                         /* message digest */
+MD5_CTX *context)                                       /* context */
+{
+  unsigned char bits[8];
+  unsigned int index, padLen;
+
+  /* Save number of bits */
+  Encode (bits, context->count, 8);
+
+  /* Pad out to 56 mod 64.
+*/
+  index = (unsigned int)((context->count[0] >> 3) & 0x3f);
+  padLen = (index < 56) ? (56 - index) : (120 - index);
+  zim_MD5Update (context, PADDING, padLen);
+
+  /* Append length (before padding) */
+  zim_MD5Update (context, bits, 8);
+  /* Store state in digest */
+  Encode (digest, context->state, 16);
+
+  /* Zeroize sensitive information.
+*/
+  MD5_memset ((POINTER)context, 0, sizeof (*context));
+}
+
+/* MD5 basic transformation. Transforms state based on block.
+ */
+static void MD5Transform (
+UINT4 state[4],
+const unsigned char block[64])
+{
+  UINT4 a = state[0], b = state[1], c = state[2], d = state[3], x[16];
+
+  Decode (x, block, 64);
+
+  /* Round 1 */
+  FF (a, b, c, d, x[ 0], S11, 0xd76aa478); /* 1 */
+  FF (d, a, b, c, x[ 1], S12, 0xe8c7b756); /* 2 */
+  FF (c, d, a, b, x[ 2], S13, 0x242070db); /* 3 */
+  FF (b, c, d, a, x[ 3], S14, 0xc1bdceee); /* 4 */
+  FF (a, b, c, d, x[ 4], S11, 0xf57c0faf); /* 5 */
+  FF (d, a, b, c, x[ 5], S12, 0x4787c62a); /* 6 */
+  FF (c, d, a, b, x[ 6], S13, 0xa8304613); /* 7 */
+  FF (b, c, d, a, x[ 7], S14, 0xfd469501); /* 8 */
+  FF (a, b, c, d, x[ 8], S11, 0x698098d8); /* 9 */
+  FF (d, a, b, c, x[ 9], S12, 0x8b44f7af); /* 10 */
+  FF (c, d, a, b, x[10], S13, 0xffff5bb1); /* 11 */
+  FF (b, c, d, a, x[11], S14, 0x895cd7be); /* 12 */
+  FF (a, b, c, d, x[12], S11, 0x6b901122); /* 13 */
+  FF (d, a, b, c, x[13], S12, 0xfd987193); /* 14 */
+  FF (c, d, a, b, x[14], S13, 0xa679438e); /* 15 */
+  FF (b, c, d, a, x[15], S14, 0x49b40821); /* 16 */
+
+ /* Round 2 */
+  GG (a, b, c, d, x[ 1], S21, 0xf61e2562); /* 17 */
+  GG (d, a, b, c, x[ 6], S22, 0xc040b340); /* 18 */
+  GG (c, d, a, b, x[11], S23, 0x265e5a51); /* 19 */
+  GG (b, c, d, a, x[ 0], S24, 0xe9b6c7aa); /* 20 */
+  GG (a, b, c, d, x[ 5], S21, 0xd62f105d); /* 21 */
+  GG (d, a, b, c, x[10], S22,  0x2441453); /* 22 */
+  GG (c, d, a, b, x[15], S23, 0xd8a1e681); /* 23 */
+  GG (b, c, d, a, x[ 4], S24, 0xe7d3fbc8); /* 24 */
+  GG (a, b, c, d, x[ 9], S21, 0x21e1cde6); /* 25 */
+  GG (d, a, b, c, x[14], S22, 0xc33707d6); /* 26 */
+  GG (c, d, a, b, x[ 3], S23, 0xf4d50d87); /* 27 */
+  GG (b, c, d, a, x[ 8], S24, 0x455a14ed); /* 28 */
+  GG (a, b, c, d, x[13], S21, 0xa9e3e905); /* 29 */
+  GG (d, a, b, c, x[ 2], S22, 0xfcefa3f8); /* 30 */
+  GG (c, d, a, b, x[ 7], S23, 0x676f02d9); /* 31 */
+  GG (b, c, d, a, x[12], S24, 0x8d2a4c8a); /* 32 */
+
+  /* Round 3 */
+  HH (a, b, c, d, x[ 5], S31, 0xfffa3942); /* 33 */
+  HH (d, a, b, c, x[ 8], S32, 0x8771f681); /* 34 */
+  HH (c, d, a, b, x[11], S33, 0x6d9d6122); /* 35 */
+  HH (b, c, d, a, x[14], S34, 0xfde5380c); /* 36 */
+  HH (a, b, c, d, x[ 1], S31, 0xa4beea44); /* 37 */
+  HH (d, a, b, c, x[ 4], S32, 0x4bdecfa9); /* 38 */
+  HH (c, d, a, b, x[ 7], S33, 0xf6bb4b60); /* 39 */
+  HH (b, c, d, a, x[10], S34, 0xbebfbc70); /* 40 */
+  HH (a, b, c, d, x[13], S31, 0x289b7ec6); /* 41 */
+  HH (d, a, b, c, x[ 0], S32, 0xeaa127fa); /* 42 */
+  HH (c, d, a, b, x[ 3], S33, 0xd4ef3085); /* 43 */
+  HH (b, c, d, a, x[ 6], S34,  0x4881d05); /* 44 */
+  HH (a, b, c, d, x[ 9], S31, 0xd9d4d039); /* 45 */
+  HH (d, a, b, c, x[12], S32, 0xe6db99e5); /* 46 */
+  HH (c, d, a, b, x[15], S33, 0x1fa27cf8); /* 47 */
+  HH (b, c, d, a, x[ 2], S34, 0xc4ac5665); /* 48 */
+
+  /* Round 4 */
+  II (a, b, c, d, x[ 0], S41, 0xf4292244); /* 49 */
+  II (d, a, b, c, x[ 7], S42, 0x432aff97); /* 50 */
+  II (c, d, a, b, x[14], S43, 0xab9423a7); /* 51 */
+  II (b, c, d, a, x[ 5], S44, 0xfc93a039); /* 52 */
+  II (a, b, c, d, x[12], S41, 0x655b59c3); /* 53 */
+  II (d, a, b, c, x[ 3], S42, 0x8f0ccc92); /* 54 */
+  II (c, d, a, b, x[10], S43, 0xffeff47d); /* 55 */
+  II (b, c, d, a, x[ 1], S44, 0x85845dd1); /* 56 */
+  II (a, b, c, d, x[ 8], S41, 0x6fa87e4f); /* 57 */
+  II (d, a, b, c, x[15], S42, 0xfe2ce6e0); /* 58 */
+  II (c, d, a, b, x[ 6], S43, 0xa3014314); /* 59 */
+  II (b, c, d, a, x[13], S44, 0x4e0811a1); /* 60 */
+  II (a, b, c, d, x[ 4], S41, 0xf7537e82); /* 61 */
+  II (d, a, b, c, x[11], S42, 0xbd3af235); /* 62 */
+  II (c, d, a, b, x[ 2], S43, 0x2ad7d2bb); /* 63 */
+  II (b, c, d, a, x[ 9], S44, 0xeb86d391); /* 64 */
+
+  state[0] += a;
+  state[1] += b;
+  state[2] += c;
+  state[3] += d;
+
+  /* Zeroize sensitive information.
+*/
+  MD5_memset ((POINTER)x, 0, sizeof (x));
+}
+
+/* Encodes input (UINT4) into output (unsigned char). Assumes len is
+  a multiple of 4.
+ */
+static void Encode (
+unsigned char *output,
+UINT4 *input,
+unsigned int len)
+{
+  unsigned int i, j;
+
+  for (i = 0, j = 0; j < len; i++, j += 4) {
+ output[j] = (unsigned char)(input[i] & 0xff);
+ output[j+1] = (unsigned char)((input[i] >> 8) & 0xff);
+ output[j+2] = (unsigned char)((input[i] >> 16) & 0xff);
+ output[j+3] = (unsigned char)((input[i] >> 24) & 0xff);
+  }
+}
+
+/* Decodes input (unsigned char) into output (UINT4). Assumes len is
+  a multiple of 4.
+ */
+static void Decode (
+UINT4 *output,
+const unsigned char *input,
+unsigned int len)
+{
+  unsigned int i, j;
+
+  for (i = 0, j = 0; j < len; i++, j += 4)
+ output[i] = ((UINT4)input[j]) | (((UINT4)input[j+1]) << 8) |
+   (((UINT4)input[j+2]) << 16) | (((UINT4)input[j+3]) << 24);
+}
+
+#if 0
+/* Note: Replace "for loop" with standard memcpy if possible.
+ */
+
+static void MD5_memcpy (
+POINTER output,
+POINTER input,
+unsigned int len)
+{
+  unsigned int i;
+
+  for (i = 0; i < len; i++)
+ output[i] = input[i];
+}
+
+/* Note: Replace "for loop" with standard memset if possible.
+ */
+static void MD5_memset (
+POINTER output,
+int value,
+unsigned int len)
+{
+  unsigned int i;
+
+  for (i = 0; i < len; i++)
+ ((char *)output)[i] = (char)value;
+}
+#endif
diff --git a/src/md5.h b/src/md5.h
new file mode 100644
index 0000000..29bdc39
--- /dev/null
+++ b/src/md5.h
@@ -0,0 +1,107 @@
+/*
+ * Copyright (C) 2003 Tommi Maekitalo
+ * 
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ * 
+ * As a special exception, you may use this file as part of a free
+ * software library without restriction. Specifically, if other files
+ * instantiate templates or use macros or inline functions from this
+ * file, or you compile this file and link it with other files to
+ * produce an executable, this file does not by itself cause the
+ * resulting executable to be covered by the GNU General Public
+ * License. This exception does not however invalidate any other
+ * reasons why the executable file might be covered by the GNU Library
+ * General Public License.
+ * 
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ * 
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+/* Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All
+rights reserved.
+
+License to copy and use this software is granted provided that it
+is identified as the "RSA Data Security, Inc. MD5 Message-Digest
+Algorithm" in all material mentioning or referencing this software
+or this function.
+
+License is also granted to make and use derivative works provided
+that such works are identified as "derived from the RSA Data
+Security, Inc. MD5 Message-Digest Algorithm" in all material
+mentioning or referencing the derived work.
+
+RSA Data Security, Inc. makes no representations concerning either
+the merchantability of this software or the suitability of this
+software for any particular purpose. It is provided "as is"
+without express or implied warranty of any kind.
+
+These notices must be retained in any copies of any part of this
+documentation and/or software.
+ */
+
+/* RSAREF types and constants
+ */
+
+/* PROTOTYPES should be set to one if and only if the compiler supports
+  function argument prototyping.
+The following makes PROTOTYPES default to 0 if it has not already
+  been defined with C compiler flags.
+ */
+
+#ifndef ZIM_MD5_H
+#define ZIM_MD5_H
+
+#ifndef PROTOTYPES
+#define PROTOTYPES 1
+#endif
+
+/* POINTER defines a generic pointer type */
+typedef unsigned char *POINTER;
+
+/* UINT2 defines a two byte word */
+typedef unsigned short int UINT2;
+
+/* UINT4 defines a four byte word */
+typedef unsigned int UINT4;
+
+/* PROTO_LIST is defined depending on how PROTOTYPES is defined above.
+   If using PROTOTYPES, then PROTO_LIST returns the list, otherwise it
+  returns an empty list.
+ */
+
+#if PROTOTYPES
+#define PROTO_LIST(list) list
+#else
+#define PROTO_LIST(list) ()
+#endif
+
+/* MD5 context. */
+struct zim_MD5_CTX {
+  UINT4 state[4];                                   /* state (ABCD) */
+  UINT4 count[2];        /* number of bits, modulo 2^64 (lsb first) */
+  unsigned char buffer[64];                         /* input buffer */
+};
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void zim_MD5Init PROTO_LIST ((struct zim_MD5_CTX *));
+void zim_MD5Update PROTO_LIST
+  ((struct zim_MD5_CTX *, const unsigned char *, unsigned int));
+void zim_MD5Final PROTO_LIST ((unsigned char [16], struct zim_MD5_CTX *));
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* ZIM_MD5_H */
diff --git a/src/meson.build b/src/meson.build
new file mode 100644
index 0000000..4529b7c
--- /dev/null
+++ b/src/meson.build
@@ -0,0 +1,82 @@
+
+configure_file(output : 'config.h',
+               configuration : private_conf,
+               input : 'config.h.in')
+
+src_directory = include_directories('.')
+
+common_sources = [
+#    'config.h',
+    'archive.cpp',
+    'cluster.cpp',
+    'buffer_reader.cpp',
+    'dirent.cpp',
+    'dirent_accessor.cpp',
+    'entry.cpp',
+    'envvalue.cpp',
+    'fileheader.cpp',
+    'fileimpl.cpp',
+    'file_compound.cpp',
+    'file_reader.cpp',
+    'item.cpp',
+    'blob.cpp',
+    'buffer.cpp',
+    'md5.c',
+    'template.cpp',
+    'uuid.cpp',
+    'tools.cpp',
+    'compression.cpp',
+    'istreamreader.cpp',
+    'writer/contentProvider.cpp',
+    'writer/creator.cpp',
+    'writer/item.cpp',
+    'writer/cluster.cpp',
+    'writer/dirent.cpp',
+    'writer/workers.cpp',
+    'writer/clusterWorker.cpp',
+    'writer/titleListingHandler.cpp',
+    'writer/counterHandler.cpp',
+    'suggestion.cpp',
+    'suggestion_iterator.cpp',
+    'version.cpp'
+]
+
+if host_machine.system() == 'windows'
+    common_sources += 'fs_windows.cpp'
+else
+    common_sources += 'fs_unix.cpp'
+endif
+
+xapian_sources = [
+    'search.cpp',
+    'search_iterator.cpp',
+    'xapian/htmlparse.cc',
+    'xapian/myhtmlparse.cc',
+    'writer/xapianIndexer.cpp',
+    'writer/xapianWorker.cpp',
+    'writer/xapianHandler.cpp'
+]
+
+sources = common_sources
+deps = [thread_dep, lzma_dep, zstd_dep]
+
+if target_machine.system() == 'freebsd'
+    deps += [execinfo_dep]
+endif
+
+if xapian_dep.found()
+    sources += xapian_sources
+    sources += lib_resources
+    deps += [xapian_dep, icu_dep]
+endif
+
+libzim = library('zim',
+                 sources,
+                 include_directories : inc,
+                 dependencies : deps,
+                 link_args : extra_link_args,
+                 cpp_args : extra_cpp_args,
+                 version: meson.project_version(),
+                 install : true)
+libzim_dep = declare_dependency(link_with: libzim,
+                                include_directories: include_directory)
diff --git a/src/narrowdown.h b/src/narrowdown.h
new file mode 100644
index 0000000..361a078
--- /dev/null
+++ b/src/narrowdown.h
@@ -0,0 +1,238 @@
+/*
+ * Copyright (C) 2020-2021 Matthieu Gautier <mgautier@kymeria.fr>
+ * Copyright (C) 2020 Veloman Yunkan
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#ifndef ZIM_NARROWDOWN_H
+#define ZIM_NARROWDOWN_H
+
+#include "zim_types.h"
+#include "debug.h"
+
+#include <algorithm>
+#include <vector>
+
+#include <zim/error.h>
+
+namespace zim
+{
+
+// Given a sorted sequence of items with a string key, NarrowDown helps to
+// narrow down the range in which the query key should belong.
+//
+// The target usage of this class is as a partial in-memory index for a sorted
+// list residing in external storage with high access cost to inidividual items.
+//
+// Illustration:
+//
+// In RAM:
+//   key:        A       I       Q       Y       g       o       w  z
+//   item #:     |       |       |       |       |       |       |  |
+// -----------   |       |       |       |       |       |       |  |
+// On disk:      V       V       V       V       V       V       V  V
+//   key:        ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz
+//   data:       ajo097124ljp-oasd)(&(*)llkjasdf@$^nFDSs00ujlasdfjkll
+//
+// In such an external list looking up an item by key can be performed via a
+// binary search where on each iteration the item key must be accessed. There
+// are two performance problems with that:
+//  1. The API may not allow accessing only the key of the given item, reading
+//     the entire item instead (this is the case with dirents).
+//  2. Access to items (or only their keys) in external storage is expensive.
+//
+// NarrowDown speeds up the look-up operation in such an external list by
+// allowing to split it into two steps:
+//  1. Perform the binary search on the index, yielding a narrower range
+//  2. Perform the binary search on the external list starting from that
+//     narrower range.
+//
+// The denser the in-memory index the more the performance improvement.
+// Therefore the implementation focus of NarrowDown is on small memory
+// footprint. If the item keys are long strings with a lot of "garbage" at the
+// end the following trick helps. Suppose that we have the following pair of
+// adjacent keys in our full (external) list:
+//
+// Item # | Key
+// ---------------------------------
+// ...    | ...
+// 1234   | "We Are The Champions"
+// 1235   | "We Will Rock You"
+// ...    | ...
+//
+// If we were to include the item #1234 in our index the naive approach would
+// be to store its key as is. However, let's imagine that the list also
+// contains an item with key "We W". Then it would have to reside between "We
+// Are The Champions" and "We Will Rock You". So we can pretend that such an
+// item exists and store in our index the fictitious entry {"We W", 1234.5}.
+// When we arrive at that entry during the range narrow-down step we must round
+// the item index downward if it is going to be used as the lower bound of
+// the range, and round it upward if it is going to be used as the upper bound
+// of the range.
+class NarrowDown
+{
+  typedef entry_index_type index_type;
+
+public: // types
+  struct Range
+  {
+    const index_type begin, end;
+  };
+
+public: // functions
+  NarrowDown()
+    : pred(&keyContentArea)
+  {}
+
+  // Add another entry to the search index. The key of the next item is used
+  // to derive and store a shorter pseudo-key as explained in the long comment
+  // above the class.
+  void add(const std::string& key, index_type i, const std::string& nextKey)
+  {
+    // It would be better to have `key >= nextKey`, but pretty old zim file were not enforce to
+    // have unique url, just that entries were sorted by url, but two entries could have the same url.
+    // It is somehow a bug and have been fixed then, but we still have to be tolerent here and accept that
+    // two concecutive keys can be equal.
+    if (key > nextKey) {
+      std::stringstream ss;
+      ss << "Dirent table is not properly sorted:\n";
+      ss << "  #" << i << ": " << key[0] << "/" << key.substr(1) << "\n";
+      ss << "  #" << i+1 << ": " << nextKey[0] << "/" << nextKey.substr(1);
+      throw ZimFileFormatError(ss.str());
+    }
+    if ( entries.empty() ) {
+      addEntry(key, i);
+    }
+    else
+    {
+      const std::string pseudoKey = shortestStringInBetween(key, nextKey);
+      if (pred(pseudoKey, entries.back())) {
+        std::stringstream ss;
+        ss << "Dirent table is not properly sorted:\n";
+        ss << "PseudoKey " << pseudoKey << " should be after (or equal) previously generated " << pred.getKeyContent(entries.back()) << "\n";
+        throw ZimFileFormatError(ss.str());
+      }
+      ASSERT(entries.back().lindex, <, i);
+      addEntry(pseudoKey, i);
+    }
+  }
+
+  void close(const std::string& key, index_type i)
+  {
+    ASSERT(entries.empty() || pred(entries.back(), key), ==, true);
+    ASSERT(entries.empty() || entries.back().lindex < i, ==, true);
+    addEntry(key, i);
+  }
+
+  Range getRange(const std::string& key) const
+  {
+    auto it = std::upper_bound(entries.begin(), entries.end(), key, pred);
+    if ( it == entries.begin() )
+      return {0, 0};
+
+    const index_type prevEntryLindex = (it-1)->lindex;
+
+    if ( it == entries.end() )
+      return {prevEntryLindex, prevEntryLindex+1};
+
+    return {prevEntryLindex, it->lindex+1};
+  }
+
+  static std::string shortestStringInBetween(const std::string& a, const std::string& b)
+  {
+    ASSERT(a, <=, b);
+
+    // msvc version of `std::mismatch(begin1, end1, begin2)`
+    // need `begin2 + (end1-begin1)` to be valid.
+    // So we cannot simply pass `a.end()` as `end1`.
+    const auto minlen = std::min(a.size(), b.size());
+    const auto m = std::mismatch(a.begin(), a.begin()+minlen, b.begin());
+    return std::string(b.begin(), std::min(b.end(), m.second+1));
+  }
+
+private: // functions
+  void addEntry(const std::string& s, index_type i)
+  {
+    entries.push_back({uint32_t(keyContentArea.size()), i});
+    keyContentArea.insert(keyContentArea.end(), s.begin(), s.end());
+    keyContentArea.push_back('\0');
+  }
+
+private: // types
+  typedef std::vector<char> KeyContentArea;
+
+  struct Entry
+  {
+    // This is mostly a truncated version of a key from the input sequence.
+    // The exceptions are
+    //   - the first item
+    //   - the last item
+    //   - keys that differ from their preceding key only in the last character
+    //
+    // std::string pseudoKey; // std::string has too much memory overhead.
+    uint32_t pseudoKeyOffset; // Instead we densely pack the key contents
+                              // into keyContentArea and store in the entry
+                              // the offset into that container.
+
+    // This represents the index of the item in the input sequence right
+    // after which pseudoKey might be inserted without breaking the sequence
+    // order. In other words, the condition
+    //
+    //    sequence[lindex] <= pseudoKey <= sequence[lindex+1]
+    //
+    // must be true.
+    index_type  lindex;
+  };
+
+  struct LookupPred
+  {
+    const KeyContentArea& keyContentArea;
+
+    explicit LookupPred(const KeyContentArea* kca)
+      : keyContentArea(*kca)
+    {}
+
+    const char* getKeyContent(const Entry& entry) const
+    {
+      return &keyContentArea[entry.pseudoKeyOffset];
+    }
+
+    bool operator()(const Entry& entry, const std::string& key) const
+    {
+      return key.compare(getKeyContent(entry)) >= 0;
+    }
+
+    bool operator()(const std::string& key, const Entry& entry) const
+    {
+      return key.compare(getKeyContent(entry)) < 0;
+    }
+  };
+
+  typedef std::vector<Entry> EntryCollection;
+
+private: // data
+  // Used to store the (shortened) keys as densely packed C-style strings
+  KeyContentArea keyContentArea;
+
+  LookupPred pred;
+
+  EntryCollection entries;
+};
+
+} // namespace zim
+
+#endif // ZIM_NARROWDOWN_H
diff --git a/src/rawstreamreader.h b/src/rawstreamreader.h
new file mode 100644
index 0000000..43596fc
--- /dev/null
+++ b/src/rawstreamreader.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (C) 2020 Matthieu Gautier <mgautier@kymeria.fr>
+ * Copyright (C) 2020 Veloman Yunkan
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#ifndef ZIM_RAWSTREAMREADER_H
+#define ZIM_RAWSTREAMREADER_H
+
+#include "istreamreader.h"
+#include "reader.h"
+#include "debug.h"
+
+namespace zim
+{
+
+class RawStreamReader : public IStreamReader
+{
+public: // functions
+  explicit RawStreamReader(std::shared_ptr<const zim::Reader> reader)
+    : m_reader(reader),
+      m_readerPos(0)
+  {}
+
+  void readImpl(char* buf, zsize_t nbytes) override
+  {
+    m_reader->read(buf, m_readerPos, zsize_t(nbytes));
+    m_readerPos += nbytes;
+  }
+
+  std::unique_ptr<const Reader> sub_reader(zsize_t nbytes) override
+  {
+    auto reader = m_reader->sub_reader(m_readerPos, nbytes);
+    m_readerPos += nbytes;
+    return reader;
+  }
+
+
+private: // data
+  std::shared_ptr<const Reader> m_reader;
+  offset_t m_readerPos;
+};
+
+} // namespace zim
+
+#endif // ZIM_READERDATASTREAMWRAPPER_H
diff --git a/src/reader.h b/src/reader.h
new file mode 100644
index 0000000..767b5e2
--- /dev/null
+++ b/src/reader.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (C) 2017-2020 Matthieu Gautier <mgautier@kymeria.fr>
+ * Copyright (C) 2020 Veloman Yunkan
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#ifndef ZIM_READER_H_
+#define ZIM_READER_H_
+
+#include <memory>
+
+#include "zim_types.h"
+#include "endian_tools.h"
+#include "debug.h"
+
+#include "buffer.h"
+
+namespace zim {
+
+class Reader {
+  public:
+    Reader() {};
+    virtual zsize_t size() const = 0;
+    virtual ~Reader() {};
+
+    virtual void read(char* dest, offset_t offset, zsize_t size) const = 0;
+    template<typename T>
+    T read_uint(offset_t offset) const {
+      ASSERT(offset.v, <, size().v);
+      ASSERT(offset.v+sizeof(T), <=, size().v);
+      char tmp_buf[sizeof(T)];
+      read(tmp_buf, offset, zsize_t(sizeof(T)));
+      return fromLittleEndian<T>(tmp_buf);
+    }
+    virtual char read(offset_t offset) const = 0;
+
+    virtual const Buffer get_buffer(offset_t offset, zsize_t size) const = 0;
+    const Buffer get_buffer(offset_t offset) const {
+      return get_buffer(offset, zsize_t(size().v-offset.v));
+    }
+    virtual std::unique_ptr<const Reader> sub_reader(offset_t offset, zsize_t size) const = 0;
+    std::unique_ptr<const Reader> sub_reader(offset_t offset) const {
+      return sub_reader(offset, zsize_t(size().v-offset.v));
+    }
+    virtual offset_t offset() const = 0;
+
+    bool can_read(offset_t offset, zsize_t size) const;
+};
+
+};
+
+#endif // ZIM_READER_H_
diff --git a/src/search.cpp b/src/search.cpp
new file mode 100644
index 0000000..e0ed54d
--- /dev/null
+++ b/src/search.cpp
@@ -0,0 +1,348 @@
+/*
+ * Copyright (C) 2017-2021 Matthieu Gautier <mgautier@kymeria.fr>
+ * Copyright (C) 2021 Maneesh P M <manu.pm55@gmail.com>
+ * Copyright (C) 2021 Veloman Yunkan
+ * Copyright (C) 2020 Emmanuel Engelhart <kelson@kiwix.org>
+ * Copyright (C) 2018 Kunal Mehta <legoktm@member.fsf.org>
+ * Copyright (C) 2007 Tommi Maekitalo
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#include <zim/search.h>
+#include <zim/archive.h>
+#include <zim/item.h>
+#include "fileimpl.h"
+#include "search_internal.h"
+#include "fs.h"
+#include "tools.h"
+
+#include <sstream>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#if !defined(_WIN32)
+# include <unistd.h>
+#else
+# include <io.h>
+#endif
+#include <errno.h>
+
+#include "xapian.h"
+#include <unicode/locid.h>
+
+#include "constants.h"
+
+#define MAX_MATCHES_TO_SORT 10000
+
+namespace zim
+{
+
+InternalDataBase::InternalDataBase(const std::vector<Archive>& archives, bool verbose)
+  : m_verbose(verbose)
+{
+    bool first = true;
+    m_queryParser.set_database(m_database);
+    m_queryParser.set_default_op(Xapian::Query::op::OP_AND);
+
+    for(auto& archive: archives) {
+        auto impl = archive.getImpl();
+        FileImpl::FindxResult r;
+        r = impl->findx('X', "fulltext/xapian");
+        if (!r.first) {
+          r = impl->findx('Z', "/fulltextIndex/xapian");
+        }
+        if (!r.first) {
+            continue;
+        }
+        auto xapianEntry = Entry(impl, entry_index_type(r.second));
+        auto accessInfo = xapianEntry.getItem().getDirectAccessInformation();
+        if (accessInfo.second == 0) {
+            continue;
+        }
+
+        Xapian::Database database;
+        if (!getDbFromAccessInfo(accessInfo, database)) {
+          continue;
+        }
+
+        if ( first ) {
+            m_valuesmap = read_valuesmap(database.get_metadata("valuesmap"));
+            auto language = database.get_metadata("language");
+            if (language.empty() ) {
+                // Database created before 2017/03 has no language metadata.
+                // However, term were stemmed anyway and we need to stem our
+                // search query the same the database was created.
+                // So we need a language, let's use the one of the zim.
+                // If zimfile has no language metadata, we can't do lot more here :/
+                try {
+                    language = archive.getMetadata("Language");
+                } catch(...) {}
+            }
+            if (!language.empty()) {
+                icu::Locale languageLocale(language.c_str());
+                /* Configuring language base steemming */
+                try {
+                    m_stemmer = Xapian::Stem(languageLocale.getLanguage());
+                    m_queryParser.set_stemmer(m_stemmer);
+                    m_queryParser.set_stemming_strategy(Xapian::QueryParser::STEM_ALL);
+                } catch (...) {
+                    std::cout << "No stemming for language '" << languageLocale.getLanguage() << "'" << std::endl;
+                }
+            }
+            auto stopwords = database.get_metadata("stopwords");
+            if ( !stopwords.empty() ){
+                std::string stopWord;
+                std::istringstream file(stopwords);
+                Xapian::SimpleStopper* stopper = new Xapian::SimpleStopper();
+                while (std::getline(file, stopWord, '\n')) {
+                    stopper->add(stopWord);
+                }
+                stopper->release();
+                m_queryParser.set_stopper(stopper);
+            }
+        } else {
+            std::map<std::string, int> valuesmap = read_valuesmap(database.get_metadata("valuesmap"));
+            if (m_valuesmap != valuesmap ) {
+                // [TODO] Ignore the database, raise a error ?
+            }
+        }
+        m_xapianDatabases.push_back(database);
+        m_database.add_database(database);
+        m_archives.push_back(archive);
+        first = false;
+    }
+}
+
+bool InternalDataBase::hasDatabase() const
+{
+  return !m_xapianDatabases.empty();
+}
+
+bool InternalDataBase::hasValuesmap() const
+{
+  return !m_valuesmap.empty();
+}
+
+bool InternalDataBase::hasValue(const std::string& valueName) const
+{
+  return (m_valuesmap.find(valueName) != m_valuesmap.end());
+}
+
+int InternalDataBase::valueSlot(const std::string& valueName) const
+{
+  return m_valuesmap.at(valueName);
+}
+
+Xapian::Query InternalDataBase::parseQuery(const Query& query)
+{
+  Xapian::Query xquery;
+
+  xquery = m_queryParser.parse_query(query.m_query);
+
+  if (query.m_geoquery && hasValue("geo.position")) {
+    Xapian::GreatCircleMetric metric;
+    Xapian::LatLongCoord centre(query.m_latitude, query.m_longitude);
+    Xapian::LatLongDistancePostingSource ps(valueSlot("geo.position"), centre, metric, query.m_distance);
+    Xapian::Query geoQuery(&ps);
+    if (query.m_query.empty()) {
+      xquery = geoQuery;
+    } else {
+      xquery = Xapian::Query(Xapian::Query::OP_FILTER, xquery, geoQuery);
+    }
+  }
+
+  return xquery;
+}
+
+Searcher::Searcher(const std::vector<Archive>& archives) :
+    mp_internalDb(nullptr),
+    m_verbose(false)
+{
+    for ( const auto& a : archives ) {
+        addArchive(a);
+    }
+}
+
+Searcher::Searcher(const Archive& archive) :
+    mp_internalDb(nullptr),
+    m_verbose(false)
+{
+    addArchive(archive);
+}
+
+Searcher::Searcher(const Searcher& other) = default;
+Searcher& Searcher::operator=(const Searcher& other) = default;
+Searcher::Searcher(Searcher&& other) = default;
+Searcher& Searcher::operator=(Searcher&& other) = default;
+Searcher::~Searcher() = default;
+
+namespace
+{
+
+bool archivesAreEquivalent(const Archive& a1, const Archive& a2)
+{
+  return a1.getUuid() == a2.getUuid();
+}
+
+bool contains(const std::vector<Archive>& archives, const Archive& newArchive)
+{
+    for ( const auto& a : archives ) {
+        if ( archivesAreEquivalent(a, newArchive) ) {
+            return true;
+        }
+    }
+    return false;
+}
+
+} // unnamed namespace
+
+Searcher& Searcher::addArchive(const Archive& archive) {
+    if ( !contains(m_archives, archive) ) {
+        m_archives.push_back(archive);
+        mp_internalDb.reset();
+    }
+    return *this;
+}
+
+Search Searcher::search(const Query& query)
+{
+  if (!mp_internalDb) {
+    initDatabase();
+  }
+
+  if (!mp_internalDb->hasDatabase()) {
+    throw(std::runtime_error("Cannot create Search without FT Xapian index"));
+  }
+
+  return Search(mp_internalDb, query);
+}
+
+void Searcher::setVerbose(bool verbose)
+{
+  m_verbose = verbose;
+}
+
+void Searcher::initDatabase()
+{
+    mp_internalDb = std::make_shared<InternalDataBase>(m_archives, m_verbose);
+}
+
+Search::Search(std::shared_ptr<InternalDataBase> p_internalDb, const Query& query)
+ : mp_internalDb(p_internalDb),
+   mp_enquire(nullptr),
+   m_query(query)
+{
+}
+
+Search::Search(Search&& s) = default;
+Search& Search::operator=(Search&& s) = default;
+Search::~Search() = default;
+
+Query::Query(const std::string& query) :
+  m_query(query)
+{}
+
+Query& Query::setQuery(const std::string& query) {
+    m_query = query;
+    return *this;
+}
+
+Query& Query::setGeorange(float latitude, float longitude, float distance) {
+    m_latitude = latitude;
+    m_longitude = longitude;
+    m_distance = distance;
+    m_geoquery = true;
+    return *this;
+}
+
+int Search::getEstimatedMatches() const
+{
+    try {
+      auto enquire = getEnquire();
+      // Force xapian to check at least 10 documents even if we ask for an empty mset.
+      // Else, the get_matches_estimated may be wrong and return 0 even if we have results.
+      auto mset = enquire.get_mset(0, 0, 10);
+      return mset.get_matches_estimated();
+    } catch(Xapian::QueryParserError& e) {
+      return 0;
+    }
+}
+
+const SearchResultSet Search::getResults(int start, int maxResults) const {
+    try {
+      auto enquire = getEnquire();
+      auto mset = enquire.get_mset(start, maxResults);
+      return SearchResultSet(mp_internalDb, std::move(mset));
+    } catch(Xapian::QueryParserError& e) {
+      return SearchResultSet(mp_internalDb);
+    }
+}
+
+Xapian::Enquire& Search::getEnquire() const
+{
+    if ( mp_enquire ) {
+        return *mp_enquire;
+    }
+
+    auto enquire = std::unique_ptr<Xapian::Enquire>(new Xapian::Enquire(mp_internalDb->m_database));
+
+    auto query = mp_internalDb->parseQuery(m_query);
+    if (mp_internalDb->m_verbose) {
+        std::cout << "Parsed query '" << m_query.m_query << "' to " << query.get_description() << std::endl;
+    }
+    enquire->set_query(query);
+
+    mp_enquire = std::move(enquire);
+    return *mp_enquire;
+}
+
+
+SearchResultSet::SearchResultSet(std::shared_ptr<InternalDataBase> p_internalDb, Xapian::MSet&& mset) :
+  mp_internalDb(p_internalDb),
+  mp_mset(std::make_shared<Xapian::MSet>(mset))
+{}
+
+SearchResultSet::SearchResultSet(std::shared_ptr<InternalDataBase> p_internalDb) :
+  mp_internalDb(p_internalDb),
+  mp_mset(nullptr)
+{}
+
+int SearchResultSet::size() const
+{
+  if (! mp_mset) {
+      return 0;
+  }
+  return mp_mset->size();
+}
+
+SearchResultSet::iterator SearchResultSet::begin() const
+{
+    if ( ! mp_mset ) {
+        return nullptr;
+    }
+    return new SearchIterator::InternalData(mp_internalDb, mp_mset, mp_mset->begin());
+}
+
+SearchResultSet::iterator SearchResultSet::end() const
+{
+    if ( ! mp_mset ) {
+        return nullptr;
+    }
+    return new SearchIterator::InternalData(mp_internalDb, mp_mset, mp_mset->end());
+}
+
+} //namespace zim
diff --git a/src/search_internal.h b/src/search_internal.h
new file mode 100644
index 0000000..c9af919
--- /dev/null
+++ b/src/search_internal.h
@@ -0,0 +1,146 @@
+/*
+ * Copyright (C) 2021 Manneesh P M <manu.pm55@gmail.com>
+ * Copyright (C) 2017-2021 Matthieu Gautier <mgautier@kymeria.fr>
+ * Copyright (C) 2006 Tommi Maekitalo
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#ifndef ZIM_SEARCH_INTERNAL_H
+#define ZIM_SEARCH_INTERNAL_H
+
+#include <xapian.h>
+
+#include <zim/entry.h>
+#include <zim/error.h>
+
+namespace zim {
+
+/**
+ * A class to encapsulate a xapian database and all the information we can gather from it.
+ */
+class InternalDataBase {
+  public: // methods
+    InternalDataBase(const std::vector<Archive>& archives, bool verbose);
+    bool hasDatabase() const;
+    bool hasValuesmap() const;
+    bool hasValue(const std::string& valueName) const;
+    int  valueSlot(const std::string&  valueName) const;
+
+    Xapian::Query parseQuery(const Query& query);
+
+  public: // data
+    // The (main) database we will search on (wrapping other xapian databases).
+    Xapian::Database m_database;
+
+    // The real databases.
+    std::vector<Xapian::Database> m_xapianDatabases;
+
+    // The archives we are searching on.
+    std::vector<Archive> m_archives;
+
+    // The valuesmap associated with the database.
+    std::map<std::string, int> m_valuesmap;
+
+    // If the database is open for suggestion.
+    // True even if the dabase has no newSuggestionformat.
+    bool m_suggestionMode;
+
+    // The query parser corresponding to the database.
+    Xapian::QueryParser m_queryParser;
+
+    // The stemmer used to parse queries
+    Xapian::Stem m_stemmer;
+
+    // Verbosity of operations.
+    bool m_verbose;
+};
+
+struct SearchIterator::InternalData {
+    std::shared_ptr<InternalDataBase> mp_internalDb;
+    std::shared_ptr<Xapian::MSet> mp_mset;
+    Xapian::MSetIterator iterator;
+    Xapian::Document _document;
+    bool document_fetched;
+    std::unique_ptr<Entry> _entry;
+
+    InternalData(const InternalData& other) :
+      mp_internalDb(other.mp_internalDb),
+      mp_mset(other.mp_mset),
+      iterator(other.iterator),
+      _document(other._document),
+      document_fetched(other.document_fetched),
+      _entry(other._entry ? new Entry(*other._entry) : nullptr )
+    {
+    }
+
+    InternalData& operator=(const InternalData& other)
+    {
+      if (this != &other) {
+        mp_internalDb = other.mp_internalDb;
+        mp_mset = other.mp_mset;
+        iterator = other.iterator;
+        _document = other._document;
+        document_fetched = other.document_fetched;
+        _entry.reset(other._entry ? new Entry(*other._entry) : nullptr);
+      }
+      return *this;
+    }
+
+    InternalData(std::shared_ptr<InternalDataBase> p_internalDb, std::shared_ptr<Xapian::MSet> p_mset, Xapian::MSetIterator iterator) :
+        mp_internalDb(p_internalDb),
+        mp_mset(p_mset),
+        iterator(iterator),
+        document_fetched(false)
+    {};
+
+    Xapian::Document get_document() {
+        if ( !document_fetched ) {
+            if (iterator == mp_mset->end()) {
+                throw std::runtime_error("Cannot get entry for end iterator");
+            }
+            _document = iterator.get_document();
+            document_fetched = true;
+        }
+        return _document;
+    }
+
+    int get_databasenumber() {
+        Xapian::docid docid = *iterator;
+        return (docid - 1) % mp_internalDb->m_archives.size();
+    }
+
+    Entry& get_entry() {
+        if ( !_entry ) {
+            int databasenumber = get_databasenumber();
+            auto archive = mp_internalDb->m_archives.at(databasenumber);
+            _entry.reset(new Entry(archive.getEntryByPath(get_document().get_data())));
+        }
+        return *_entry.get();
+    }
+
+    bool operator==(const InternalData& other) const {
+        return (mp_internalDb == other.mp_internalDb
+            &&  mp_mset == other.mp_mset
+            &&  iterator == other.iterator);
+    }
+};
+
+
+
+}; //namespace zim
+
+#endif //ZIM_SEARCH_INTERNAL_H
diff --git a/src/search_iterator.cpp b/src/search_iterator.cpp
new file mode 100644
index 0000000..0c5cbca
--- /dev/null
+++ b/src/search_iterator.cpp
@@ -0,0 +1,245 @@
+/*
+ * Copyright (C) 2021 Maneesh P M <manu.pm55@gmail.com>
+ * Copyright (C) 2017-2021 Matthieu Gautier <mgautier@kymeria.fr>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#define ZIM_PRIVATE
+
+#include "xapian/myhtmlparse.h"
+#include <zim/search_iterator.h>
+#include <zim/search.h>
+#include <zim/archive.h>
+#include <zim/item.h>
+#include "search_internal.h"
+
+namespace zim {
+
+
+SearchIterator::~SearchIterator() = default;
+SearchIterator::SearchIterator(SearchIterator&& it) = default;
+SearchIterator& SearchIterator::operator=(SearchIterator&& it) = default;
+
+SearchIterator::SearchIterator() : SearchIterator(nullptr)
+{};
+
+SearchIterator::SearchIterator(InternalData* internal_data)
+  : internal(internal_data)
+{}
+
+SearchIterator::SearchIterator(const SearchIterator& it)
+    : internal(nullptr)
+{
+    if (it.internal) internal = std::unique_ptr<InternalData>(new InternalData(*it.internal));
+}
+
+SearchIterator & SearchIterator::operator=(const SearchIterator& it) {
+    if ( ! it.internal ) internal.reset();
+    else if ( ! internal ) internal = std::unique_ptr<InternalData>(new InternalData(*it.internal));
+    else *internal = *it.internal;
+
+    return *this;
+}
+
+bool SearchIterator::operator==(const SearchIterator& it) const {
+    if ( ! internal && ! it.internal) {
+        return true;
+    }
+    if ( ! internal || ! it.internal) {
+        return false;
+    }
+    return (*internal == *it.internal);
+}
+
+bool SearchIterator::operator!=(const SearchIterator& it) const {
+    return ! (*this == it);
+}
+
+SearchIterator& SearchIterator::operator++() {
+    if ( ! internal ) {
+        return *this;
+    }
+    ++(internal->iterator);
+    internal->document_fetched = false;
+    internal->_entry.reset();
+    return *this;
+}
+
+SearchIterator SearchIterator::operator++(int) {
+    SearchIterator it = *this;
+    operator++();
+    return it;
+}
+
+SearchIterator& SearchIterator::operator--() {
+    if ( ! internal ) {
+        return *this;
+    }
+    --(internal->iterator);
+    internal->document_fetched = false;
+    internal->_entry.reset();
+    return *this;
+}
+
+SearchIterator SearchIterator::operator--(int) {
+    SearchIterator it = *this;
+    operator--();
+    return it;
+}
+
+std::string SearchIterator::getPath() const {
+    if ( ! internal ) {
+        return "";
+    }
+
+    std::string path = internal->get_document().get_data();
+    bool hasNewNamespaceScheme = internal->mp_internalDb->m_archives.at(getFileIndex()).hasNewNamespaceScheme();
+
+    std::string dbDataType = internal->mp_internalDb->m_database.get_metadata("data");
+    if (dbDataType.empty()) {
+        dbDataType = "fullPath";
+    }
+
+    // If the archive has new namespace scheme and the type of its indexed data
+    // is `fullPath` we return only the `path` without namespace
+    if (hasNewNamespaceScheme && dbDataType == "fullPath") {
+        path = path.substr(2);
+    }
+    return path;
+}
+
+std::string SearchIterator::getDbData() const {
+    if ( ! internal ) {
+        return "";
+    }
+
+    return internal->get_document().get_data();
+}
+
+std::string SearchIterator::getTitle() const {
+    if ( ! internal ) {
+        return "";
+    }
+    return internal->get_entry().getTitle();
+}
+
+int SearchIterator::getScore() const {
+    if ( ! internal ) {
+        return 0;
+    }
+    return internal->iterator.get_percent();
+}
+
+std::string SearchIterator::getSnippet() const {
+    if ( ! internal ) {
+        return "";
+    }
+
+    // Generate full text snippet
+    if ( ! internal->mp_internalDb->hasValuesmap() )
+    {
+        /* This is the old legacy version. Guess and try */
+        std::string stored_snippet = internal->get_document().get_value(1);
+        if ( ! stored_snippet.empty() )
+            return stored_snippet;
+        /* Let's continue here, and see if we can genenate one */
+    }
+    else if ( internal->mp_internalDb->hasValue("snippet") )
+    {
+        return internal->get_document().get_value(internal->mp_internalDb->valueSlot("snippet"));
+    }
+    /* No reader, no snippet */
+    try {
+        Entry& entry = internal->get_entry();
+        /* Get the content of the item to generate a snippet.
+           We parse it and use the html dump to avoid remove html tags in the
+           content and be able to nicely cut the text at random place. */
+        zim::MyHtmlParser htmlParser;
+        std::string content = entry.getItem().getData();
+        try {
+          htmlParser.parse_html(content, "UTF-8", true);
+        } catch (...) {}
+        return internal->mp_mset->snippet(htmlParser.dump,
+                                          /*length=*/500,
+                                          /*stemmer=*/internal->mp_internalDb->m_stemmer,
+                                          /*flags=*/0);
+    } catch (...) {
+      return "";
+    }
+}
+
+int SearchIterator::getSize() const {
+    if ( ! internal ) {
+        return -1;
+    }
+    if ( ! internal->mp_internalDb->hasValuesmap() )
+    {
+        /* This is the old legacy version. Guess and try */
+        return internal->get_document().get_value(2).empty() == true ? -1 : atoi(internal->get_document().get_value(2).c_str());
+    }
+    else if ( internal->mp_internalDb->hasValue("size") )
+    {
+        return atoi(internal->get_document().get_value(internal->mp_internalDb->valueSlot("size")).c_str());
+    }
+    /* The size is never used. Do we really want to get the content and
+       calculate the size ? */
+    return -1;
+}
+
+int SearchIterator::getWordCount() const      {
+    if ( ! internal ) {
+        return -1;
+    }
+    if ( ! internal->mp_internalDb->hasValuesmap() )
+    {
+        /* This is the old legacy version. Guess and try */
+        return internal->get_document().get_value(3).empty() == true ? -1 : atoi(internal->get_document().get_value(3).c_str());
+    }
+    else if ( internal->mp_internalDb->hasValue("wordcount") )
+    {
+        return atoi(internal->get_document().get_value(internal->mp_internalDb->valueSlot("wordcount")).c_str());
+    }
+    return -1;
+}
+
+int SearchIterator::getFileIndex() const {
+    if ( internal ) {
+        return internal->get_databasenumber();
+    }
+    return 0;
+}
+
+Uuid SearchIterator::getZimId() const {
+    if (! internal ) {
+        throw std::runtime_error("Cannot get zimId from uninitialized iterator");
+    }
+    return internal->mp_internalDb->m_archives.at(getFileIndex()).getUuid();
+}
+
+SearchIterator::reference SearchIterator::operator*() const {
+    if (! internal ) {
+        throw std::runtime_error("Cannot get a entry for a uninitialized iterator");
+    }
+    return internal->get_entry();
+}
+
+SearchIterator::pointer SearchIterator::operator->() const {
+    return &**this;
+}
+
+
+} // namespace zim
diff --git a/src/suggestion.cpp b/src/suggestion.cpp
new file mode 100644
index 0000000..ab4dc6e
--- /dev/null
+++ b/src/suggestion.cpp
@@ -0,0 +1,336 @@
+/*
+ * Copyright (C) 2021 Maneesh P M <manu.pm55@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#define ZIM_PRIVATE
+
+#include <zim/suggestion.h>
+#include <zim/item.h>
+#include "suggestion_internal.h"
+#include <iostream>
+#include "fileimpl.h"
+#include "tools.h"
+#include "constants.h"
+
+#if defined(ENABLE_XAPIAN)
+#include <unicode/locid.h>
+#endif  // ENABLE_XAPIAN
+
+namespace zim
+{
+
+SuggestionDataBase::SuggestionDataBase(const Archive& archive, bool verbose)
+  : m_archive(archive),
+    m_verbose(verbose)
+{
+// Initialize Xapian DB if it is enabled
+#if defined(ENABLE_XAPIAN)
+  initXapianDb();
+#endif  // ENABLE_XAPIAN
+}
+
+#if defined(ENABLE_XAPIAN)
+void SuggestionDataBase::initXapianDb() {
+  m_queryParser.set_database(m_database);
+  m_queryParser.set_default_op(Xapian::Query::op::OP_AND);
+
+  auto impl = m_archive.getImpl();
+  FileImpl::FindxResult r;
+
+  r = impl->findx('X', "title/xapian");
+  if (!r.first) {
+    return;
+  }
+
+  auto xapianEntry = Entry(impl, entry_index_type(r.second));
+  auto accessInfo = xapianEntry.getItem().getDirectAccessInformation();
+  if (accessInfo.second == 0) {
+      return;
+  }
+
+  Xapian::Database database;
+  if (!getDbFromAccessInfo(accessInfo, database)) {
+    return;
+  }
+
+  m_valuesmap = read_valuesmap(database.get_metadata("valuesmap"));
+  auto language = database.get_metadata("language");
+  if (language.empty() ) {
+      // Database created before 2017/03 has no language metadata.
+      // However, term were stemmed anyway and we need to stem our
+      // search query the same the database was created.
+      // So we need a language, let's use the one of the zim.
+      // If zimfile has no language metadata, we can't do lot more here :/
+      try {
+          language = m_archive.getMetadata("Language");
+      } catch(...) {}
+  }
+  if (!language.empty()) {
+      icu::Locale languageLocale(language.c_str());
+      /* Configuring language base steemming */
+      try {
+          m_stemmer = Xapian::Stem(languageLocale.getLanguage());
+          m_queryParser.set_stemmer(m_stemmer);
+      } catch (...) {
+          std::cout << "No stemming for language '" << languageLocale.getLanguage() << "'" << std::endl;
+      }
+  }
+
+  m_database = database;
+}
+
+bool SuggestionDataBase::hasDatabase() const
+{
+  return !m_database.internal.empty();
+}
+
+bool SuggestionDataBase::hasValuesmap() const
+{
+  return !m_valuesmap.empty();
+}
+
+bool SuggestionDataBase::hasValue(const std::string& valueName) const
+{
+  return (m_valuesmap.find(valueName) != m_valuesmap.end());
+}
+
+int SuggestionDataBase::valueSlot(const std::string& valueName) const
+{
+  return m_valuesmap.at(valueName);
+}
+
+/*
+ * subquery_phrase: selects documents that have the terms in the order of the query
+ * within a specified window.
+ * subquery_anchored: selects documents that have the terms in the order of the
+ * query within a specified window and starts from the beginning of the document.
+ * subquery_and: selects documents that have all the terms in the query.
+ *
+ * subquery_phrase and subquery_anchored by themselves are quite exclusive. To
+ * include more "similar" docs, we combine them with subquery_and using OP_OR
+ * operator. If a particular document has a weight of A in subquery_and and B
+ * in subquery_phrase and C in subquery_anchored, the net weight of that document
+ * becomes A+B+C (normalised out of 100). So the documents closer to the query
+ * gets a higher relevance.
+ */
+Xapian::Query SuggestionDataBase::parseQuery(const std::string& query)
+{
+  std::lock_guard<std::mutex> locker(m_mutex);
+  Xapian::Query xquery;
+
+  const auto flags = Xapian::QueryParser::FLAG_DEFAULT | Xapian::QueryParser::FLAG_PARTIAL;
+
+  // Reset stemming strategy for normal parsing
+  m_queryParser.set_stemming_strategy(Xapian::QueryParser::STEM_SOME);
+  xquery = m_queryParser.parse_query(query, flags);
+
+  if (!query.empty()) {
+    // Reconfigure stemming strategy for phrase search
+    m_queryParser.set_stemming_strategy(Xapian::QueryParser::STEM_NONE);
+
+    Xapian::Query subquery_phrase = m_queryParser.parse_query(query);
+    // Force the OP_PHRASE window to be equal to the number of terms.
+    subquery_phrase = Xapian::Query(Xapian::Query::OP_PHRASE, subquery_phrase.get_terms_begin(), subquery_phrase.get_terms_end(), subquery_phrase.get_length());
+
+    auto qs = ANCHOR_TERM + query;
+    Xapian::Query subquery_anchored = m_queryParser.parse_query(qs);
+    subquery_anchored = Xapian::Query(Xapian::Query::OP_PHRASE, subquery_anchored.get_terms_begin(), subquery_anchored.get_terms_end(), subquery_anchored.get_length());
+
+    xquery = Xapian::Query(Xapian::Query::OP_OR, xquery, subquery_phrase);
+    xquery = Xapian::Query(Xapian::Query::OP_OR, xquery, subquery_anchored);
+  }
+
+  return xquery;
+}
+
+#endif  // ENABLE_XAPIAN
+
+SuggestionSearcher::SuggestionSearcher(const Archive& archive) :
+    mp_internalDb(nullptr),
+    m_archive(archive),
+    m_verbose(false)
+{}
+
+SuggestionSearcher::SuggestionSearcher(const SuggestionSearcher& other) = default;
+SuggestionSearcher& SuggestionSearcher::operator=(const SuggestionSearcher& other) = default;
+SuggestionSearcher::SuggestionSearcher(SuggestionSearcher&& other) = default;
+SuggestionSearcher& SuggestionSearcher::operator=(SuggestionSearcher&& other) = default;
+SuggestionSearcher::~SuggestionSearcher() = default;
+
+SuggestionSearch SuggestionSearcher::suggest(const std::string& query)
+{
+  if (!mp_internalDb) {
+    initDatabase();
+  }
+  return SuggestionSearch(mp_internalDb, query);
+}
+
+void SuggestionSearcher::setVerbose(bool verbose)
+{
+  m_verbose = verbose;
+}
+
+void SuggestionSearcher::initDatabase()
+{
+    mp_internalDb = std::make_shared<SuggestionDataBase>(m_archive, m_verbose);
+}
+
+SuggestionSearch::SuggestionSearch(std::shared_ptr<SuggestionDataBase> p_internalDb, const std::string& query)
+ : mp_internalDb(p_internalDb),
+   m_query(query)
+#if defined(ENABLE_XAPIAN)
+   , mp_enquire(nullptr)
+#endif  // ENABLE_XAPIAN
+{}
+
+SuggestionSearch::SuggestionSearch(SuggestionSearch&& s) = default;
+SuggestionSearch& SuggestionSearch::operator=(SuggestionSearch&& s) = default;
+SuggestionSearch::~SuggestionSearch() = default;
+
+int SuggestionSearch::getEstimatedMatches() const
+{
+#if defined(ENABLE_XAPIAN)
+  if (mp_internalDb->hasDatabase()) {
+    try {
+      auto enquire = getEnquire();
+      // Force xapian to check at least 10 documents even if we ask for an empty mset.
+      // Else, the get_matches_estimated may be wrong and return 0 even if we have results.
+      auto mset = enquire.get_mset(0, 0, 10);
+      return mset.get_matches_estimated();
+    } catch(...) {
+      std::cerr << "Query Parsing failed, Switching to search without index." << std::endl;
+    }
+  }
+#endif  // ENABLE_XAPIAN
+
+  return mp_internalDb->m_archive.findByTitle(m_query).size();
+}
+
+const SuggestionResultSet SuggestionSearch::getResults(int start, int maxResults) const {
+#if defined(ENABLE_XAPIAN)
+    if (mp_internalDb->hasDatabase())
+    {
+      try {
+        auto enquire = getEnquire();
+        auto mset = enquire.get_mset(start, maxResults);
+        return SuggestionResultSet(mp_internalDb, std::move(mset));
+      } catch(...) {
+        std::cerr << "Query Parsing failed, Switching to search without index." << std::endl;
+      }
+    }
+#endif  // ENABLE_XAPIAN
+
+    auto entryRange = mp_internalDb->m_archive.findByTitle(m_query);
+    entryRange.offset(start, maxResults);
+    return SuggestionResultSet(entryRange);
+}
+
+const void SuggestionSearch::forceRangeSuggestion() {
+#if defined(ENABLE_XAPIAN)
+  mp_internalDb->m_database.close();
+#endif  // ENABLE_XAPIAN
+}
+
+#if defined(ENABLE_XAPIAN)
+Xapian::Enquire& SuggestionSearch::getEnquire() const
+{
+    if ( mp_enquire ) {
+        return *mp_enquire;
+    }
+
+    auto enquire = std::unique_ptr<Xapian::Enquire>(new Xapian::Enquire(mp_internalDb->m_database));
+
+    const auto unaccentedQuery = removeAccents(m_query);
+    auto query = mp_internalDb->parseQuery(unaccentedQuery);
+    if (mp_internalDb->m_verbose) {
+        std::cout << "Parsed query '" << unaccentedQuery << "' to " << query.get_description() << std::endl;
+    }
+    enquire->set_query(query);
+
+   /*
+    * In suggestion mode, we are searching over a separate title index. Default BM25 is not
+    * adapted for this case. WDF factor(k1) controls the effect of within document frequency.
+    * k1 = 0.001 reduces the effect of word repitition in document. In BM25, smaller documents
+    * get larger weights, so normalising the length of documents is necessary using b = 1.
+    * The document set is first sorted by their relevance score then by value so that suggestion
+    * results are closer to search string.
+    * refer https://xapian.org/docs/apidoc/html/classXapian_1_1BM25Weight.html
+    */
+
+    enquire->set_weighting_scheme(Xapian::BM25Weight(0.001,0,1,1,0.5));
+    if (mp_internalDb->hasValue("title")) {
+      enquire->set_sort_by_relevance_then_value(mp_internalDb->valueSlot("title"), false);
+    }
+
+    if (mp_internalDb->hasValue("targetPath")) {
+      enquire->set_collapse_key(mp_internalDb->valueSlot("targetPath"));
+    }
+
+    mp_enquire = std::move(enquire);
+    return *mp_enquire;
+}
+
+SuggestionResultSet::SuggestionResultSet(std::shared_ptr<SuggestionDataBase> p_internalDb, Xapian::MSet&& mset) :
+  mp_internalDb(p_internalDb),
+  mp_entryRange(nullptr),
+  mp_mset(std::make_shared<Xapian::MSet>(mset))
+{}
+#endif  // ENABLE_XAPIAN
+
+SuggestionResultSet::SuggestionResultSet(EntryRange entryRange) :
+  mp_internalDb(nullptr),
+  mp_entryRange(std::unique_ptr<EntryRange>(new EntryRange(entryRange)))
+#if defined(ENABLE_XAPIAN)
+  , mp_mset(nullptr)
+#endif  // ENABLE_XAPIAN
+{}
+
+int SuggestionResultSet::size() const
+{
+#if defined(ENABLE_XAPIAN)
+  if (! mp_entryRange) {
+      return mp_mset->size();
+  }
+#endif  // ENABLE_XAPIAN
+
+  return mp_entryRange->size();
+}
+
+SuggestionResultSet::iterator SuggestionResultSet::begin() const
+{
+#if defined(ENABLE_XAPIAN)
+    if ( ! mp_entryRange ) {
+        return new iterator::SuggestionInternalData(mp_internalDb, mp_mset, mp_mset->begin());
+    }
+#endif  // ENABLE_XAPIAN
+
+    return iterator(mp_entryRange->begin());
+}
+
+SuggestionResultSet::iterator SuggestionResultSet::end() const
+{
+#if defined(ENABLE_XAPIAN)
+    if ( ! mp_entryRange ) {
+        return new iterator::SuggestionInternalData(mp_internalDb, mp_mset, mp_mset->end());
+    }
+#endif  // ENABLE_XAPIAN
+
+    return iterator(mp_entryRange->end());
+}
+
+} // namespace zim
diff --git a/src/suggestion_internal.h b/src/suggestion_internal.h
new file mode 100644
index 0000000..fa338fc
--- /dev/null
+++ b/src/suggestion_internal.h
@@ -0,0 +1,150 @@
+/*
+ * Copyright (C) 2021 Matthieu Gautier <mgautier@kymeria.fr>
+ * Copyright (C) 2021 Maneesh P M <manu.pm55@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#ifndef ZIM_SUGGESTION_INTERNAL_H
+#define ZIM_SUGGESTION_INTERNAL_H
+
+#include "zim/suggestion.h"
+#include "zim/archive.h"
+
+#include <stdexcept>
+#include <mutex>
+
+#if defined(LIBZIM_WITH_XAPIAN)
+#include <xapian.h>
+#endif
+
+namespace zim
+{
+
+/**
+ * A class to encapsulate a xapian title index and it's archive and all the
+ * information we can gather from it.
+ */
+class SuggestionDataBase {
+  public: // methods
+    SuggestionDataBase(const Archive& archive, bool verbose);
+
+  public: // data
+    // The archive to get suggestions from.
+    Archive m_archive;
+
+    // Verbosity of operations.
+    bool m_verbose;
+
+  private: // data
+    std::mutex m_mutex;
+
+#if defined(LIBZIM_WITH_XAPIAN)
+
+  public: // xapian based methods
+    bool hasDatabase() const;
+    bool hasValuesmap() const;
+    bool hasValue(const std::string& valueName) const;
+    int  valueSlot(const std::string&  valueName) const;
+
+    Xapian::Query parseQuery(const std::string& query);
+
+  public: // xapian based data
+    // The Xapian database we will search on.
+    Xapian::Database m_database;
+
+    // The valuesmap associated with the database.
+    std::map<std::string, int> m_valuesmap;
+
+    // The query parser corresponding to the database.
+    Xapian::QueryParser m_queryParser;
+
+    // The stemmer used to parse queries
+    Xapian::Stem m_stemmer;
+
+  private:
+    void initXapianDb();
+#endif  // LIBZIM_WITH_XAPIAN
+};
+
+#if defined(LIBZIM_WITH_XAPIAN)
+struct SuggestionIterator::SuggestionInternalData {
+    std::shared_ptr<SuggestionDataBase> mp_internalDb;
+    std::shared_ptr<Xapian::MSet> mp_mset;
+    Xapian::MSetIterator iterator;
+    Xapian::Document _document;
+    bool document_fetched;
+    std::unique_ptr<Entry> _entry;
+
+    SuggestionInternalData(const SuggestionInternalData& other) :
+      mp_internalDb(other.mp_internalDb),
+      mp_mset(other.mp_mset),
+      iterator(other.iterator),
+      _document(other._document),
+      document_fetched(other.document_fetched),
+      _entry(other._entry ? new Entry(*other._entry) : nullptr )
+    {
+    }
+
+    SuggestionInternalData& operator=(const SuggestionInternalData& other)
+    {
+      if (this != &other) {
+        mp_internalDb = other.mp_internalDb;
+        mp_mset = other.mp_mset;
+        iterator = other.iterator;
+        _document = other._document;
+        document_fetched = other.document_fetched;
+        _entry.reset(other._entry ? new Entry(*other._entry) : nullptr);
+      }
+      return *this;
+    }
+
+    SuggestionInternalData(std::shared_ptr<SuggestionDataBase> p_internalDb, std::shared_ptr<Xapian::MSet> p_mset, Xapian::MSetIterator iterator) :
+        mp_internalDb(p_internalDb),
+        mp_mset(p_mset),
+        iterator(iterator),
+        document_fetched(false)
+    {};
+
+    Xapian::Document get_document() {
+        if ( !document_fetched ) {
+            if (iterator == mp_mset->end()) {
+                throw std::runtime_error("Cannot get entry for end iterator");
+            }
+            _document = iterator.get_document();
+            document_fetched = true;
+        }
+        return _document;
+    }
+
+    Entry& get_entry() {
+        if (!_entry) {
+            _entry.reset(new Entry(mp_internalDb->m_archive.getEntryByPath(get_document().get_data())));
+        }
+        return *_entry.get();
+    }
+
+    bool operator==(const SuggestionInternalData& other) const {
+        return (mp_internalDb == other.mp_internalDb
+            &&  mp_mset == other.mp_mset
+            &&  iterator == other.iterator);
+    }
+};
+#endif  // LIBZIM_WITH_XAPIAN
+
+}
+
+#endif // ZIM_SUGGESTION_INTERNAL_H
diff --git a/src/suggestion_iterator.cpp b/src/suggestion_iterator.cpp
new file mode 100644
index 0000000..4a2be50
--- /dev/null
+++ b/src/suggestion_iterator.cpp
@@ -0,0 +1,238 @@
+/*
+ * Copyright (C) 2021 Maneesh P M <manu.pm55@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#define ZIM_PRIVATE
+
+#include "zim/suggestion_iterator.h"
+#include "suggestion_internal.h"
+#include <stdexcept>
+
+namespace zim
+{
+
+SuggestionIterator::~SuggestionIterator() = default;
+SuggestionIterator::SuggestionIterator(SuggestionIterator&& it) = default;
+SuggestionIterator& SuggestionIterator::operator=(SuggestionIterator&& it) = default;
+
+SuggestionIterator::SuggestionIterator(RangeIterator rangeIterator)
+  : mp_rangeIterator(std::unique_ptr<RangeIterator>(new RangeIterator(rangeIterator)))
+#if defined(LIBZIM_WITH_XAPIAN)
+    , mp_internal(nullptr)
+#endif  // LIBZIM_WITH_XAPIAN
+{}
+
+#if defined(LIBZIM_WITH_XAPIAN)
+SuggestionIterator::SuggestionIterator(SuggestionInternalData* internal)
+  : mp_rangeIterator(nullptr),
+    mp_internal(internal)
+{}
+#endif  // LIBZIM_WITH_XAPIAN
+
+SuggestionIterator::SuggestionIterator(const SuggestionIterator& it)
+    : mp_rangeIterator(nullptr)
+{
+#if defined(LIBZIM_WITH_XAPIAN)
+    mp_internal.reset(nullptr);
+    if (it.mp_internal) {
+        mp_internal = std::unique_ptr<SuggestionInternalData>(new SuggestionInternalData(*it.mp_internal));
+    }
+#endif  // LIBZIM_WITH_XAPIAN
+
+    if (it.mp_rangeIterator) {
+        mp_rangeIterator = std::unique_ptr<RangeIterator>(new RangeIterator(*it.mp_rangeIterator));
+    }
+}
+
+SuggestionIterator& SuggestionIterator::operator=(const SuggestionIterator& it) {
+    mp_rangeIterator.reset();
+    if (it.mp_rangeIterator) {
+        mp_rangeIterator.reset(new RangeIterator(*it.mp_rangeIterator));
+    }
+
+#if defined(LIBZIM_WITH_XAPIAN)
+    mp_internal.reset();
+    if (it.mp_internal) {
+        mp_internal.reset(new SuggestionInternalData(*it.mp_internal));
+    }
+#endif  // LIBZIM_WITH_XAPIAN
+
+    m_suggestionItem.reset();
+    return *this;
+}
+
+bool SuggestionIterator::operator==(const SuggestionIterator& it) const {
+    if (mp_rangeIterator && it.mp_rangeIterator) {
+        return (*mp_rangeIterator == *it.mp_rangeIterator);
+    }
+
+#if defined(LIBZIM_WITH_XAPIAN)
+    if (mp_internal && it.mp_internal) {
+        return (*mp_internal == *it.mp_internal);
+    }
+#endif  // LIBZIM_WITH_XAPIAN
+
+    return false;
+}
+
+bool SuggestionIterator::operator!=(const SuggestionIterator& it) const {
+    return ! (*this == it);
+}
+
+SuggestionIterator& SuggestionIterator::operator++() {
+#if defined(LIBZIM_WITH_XAPIAN)
+    if (mp_internal) {
+        ++(mp_internal->iterator);
+        mp_internal->_entry.reset();
+        mp_internal->document_fetched = false;
+    }
+#endif  // LIBZIM_WITH_XAPIAN
+
+    if (mp_rangeIterator) {
+        ++(*mp_rangeIterator);
+    }
+    m_suggestionItem.reset();
+    return *this;
+}
+
+SuggestionIterator SuggestionIterator::operator++(int) {
+    SuggestionIterator it = *this;
+    operator++();
+    return it;
+}
+
+SuggestionIterator& SuggestionIterator::operator--() {
+#if defined(LIBZIM_WITH_XAPIAN)
+    if (mp_internal) {
+        --(mp_internal->iterator);
+        mp_internal->_entry.reset();
+        mp_internal->document_fetched = false;
+    }
+#endif  // LIBZIM_WITH_XAPIAN
+
+    if (mp_rangeIterator) {
+        --(*mp_rangeIterator);
+    }
+    m_suggestionItem.reset();
+    return *this;
+}
+
+SuggestionIterator SuggestionIterator::operator--(int) {
+    SuggestionIterator it = *this;
+    operator--();
+    return it;
+}
+
+Entry SuggestionIterator::getEntry() const {
+#if defined(LIBZIM_WITH_XAPIAN)
+    if (mp_internal) {
+        return mp_internal->get_entry();
+    }
+#endif  // LIBZIM_WITH_XAPIAN
+
+    if (mp_rangeIterator) {
+        return **mp_rangeIterator;
+    }
+    throw std::runtime_error("Cannot dereference iterator");
+}
+
+#if defined(LIBZIM_WITH_XAPIAN)
+std::string SuggestionIterator::getDbData() const {
+    if (! mp_internal) {
+        return "";
+    }
+
+    return mp_internal->get_document().get_data();
+}
+
+std::string SuggestionIterator::getIndexPath() const
+{
+    if (! mp_internal) {
+        return "";
+    }
+
+    std::string path = mp_internal->get_document().get_data();
+    bool hasNewNamespaceScheme = mp_internal->mp_internalDb->m_archive.hasNewNamespaceScheme();
+
+    std::string dbDataType = mp_internal->mp_internalDb->m_database.get_metadata("data");
+    if (dbDataType.empty()) {
+        dbDataType = "fullPath";
+    }
+
+    // If the archive has new namespace scheme and the type of its indexed data
+    // is `fullPath` we return only the `path` without namespace
+    if (hasNewNamespaceScheme && dbDataType == "fullPath") {
+        path = path.substr(2);
+    }
+    return path;
+}
+
+std::string SuggestionIterator::getIndexTitle() const {
+    if ( ! mp_internal) {
+        return "";
+    }
+    try {
+        return mp_internal->get_entry().getTitle();
+    } catch (...) {
+        return "";
+    }
+}
+
+std::string SuggestionIterator::getIndexSnippet() const {
+    if (! mp_internal) {
+        return "";
+    }
+
+    try {
+        return mp_internal->mp_mset->snippet(getIndexTitle(), 500, mp_internal->mp_internalDb->m_stemmer);
+    } catch(...) {
+        return "";
+    }
+}
+#endif  // LIBZIM_WITH_XAPIAN
+
+const SuggestionItem& SuggestionIterator::operator*() {
+    if (m_suggestionItem) {
+        return *m_suggestionItem;
+    }
+
+#if defined(LIBZIM_WITH_XAPIAN)
+    if (mp_internal) {
+        m_suggestionItem.reset(new SuggestionItem(getIndexTitle(),
+                getIndexPath(), getIndexSnippet()));
+    } else
+#endif  // LIBZIM_WITH_XAPIAN
+
+    if (mp_rangeIterator) {
+        m_suggestionItem.reset(new SuggestionItem((*mp_rangeIterator)->getTitle(),
+                                                (*mp_rangeIterator)->getPath()));
+    }
+
+    if (!m_suggestionItem){
+        throw std::runtime_error("Cannot dereference iterator");
+    }
+
+    return *m_suggestionItem.get();
+}
+
+const SuggestionItem* SuggestionIterator::operator->() {
+    operator*();
+    return m_suggestionItem.get();
+}
+
+} // namespace zim
diff --git a/src/template.cpp b/src/template.cpp
new file mode 100644
index 0000000..75e4bb8
--- /dev/null
+++ b/src/template.cpp
@@ -0,0 +1,142 @@
+/*
+ * Copyright (C) 2009 Tommi Maekitalo
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#include "template.h"
+
+namespace zim
+{
+  void TemplateParser::state_data(char ch)
+  {
+    data += ch;
+
+    if (ch == '<')
+    {
+      state = &TemplateParser::state_lt;
+      save = data.size() - 1;
+    }
+  }
+
+  void TemplateParser::state_lt(char ch)
+  {
+    data += ch;
+
+    if (ch == '%')
+      state = &TemplateParser::state_token0;
+    else
+      state = &TemplateParser::state_data;
+  }
+
+  void TemplateParser::state_token0(char ch)
+  {
+    data += ch;
+
+    if (ch == '/')
+      state = &TemplateParser::state_link0;
+    else
+    {
+      token = data.size() - 1;
+      state = &TemplateParser::state_token;
+    }
+  }
+
+  void TemplateParser::state_token(char ch)
+  {
+    data += ch;
+
+    if (ch == '%')
+      state = &TemplateParser::state_token_end;
+  }
+
+  void TemplateParser::state_token_end(char ch)
+  {
+    if (ch == '>')
+    {
+      if (event)
+      {
+        event->onData(data.substr(0, save));
+        event->onToken(data.substr(token, data.size() - token - 1));
+        data.clear();
+      }
+
+      state = &TemplateParser::state_data;
+    }
+    else
+    {
+      data += ch;
+      state = &TemplateParser::state_data;
+    }
+  }
+
+  void TemplateParser::state_link0(char ch)
+  {
+    data += ch;
+
+    ns = ch;
+    state = &TemplateParser::state_link;
+  }
+
+  void TemplateParser::state_link(char ch)
+  {
+    data += ch;
+
+    if (ch == '/')
+    {
+      token = data.size();
+      state = &TemplateParser::state_title;
+    }
+    else
+      state = &TemplateParser::state_data;
+  }
+
+  void TemplateParser::state_title(char ch)
+  {
+    data += ch;
+
+    if (ch == '%')
+    {
+      token_e = data.size() - 1;
+      state = &TemplateParser::state_title_end;
+    }
+  }
+
+  void TemplateParser::state_title_end(char ch)
+  {
+    data += ch;
+
+    if (ch == '>')
+    {
+      if (event)
+      {
+        event->onData(data.substr(0, save));
+        event->onLink(ns, data.substr(token, token_e - token));
+      }
+
+      data.clear();
+      state = &TemplateParser::state_data;
+    }
+  }
+
+  void TemplateParser::flush()
+  {
+    if (event)
+      event->onData(data);
+    data.clear();
+    state = &TemplateParser::state_data;
+  }
+}
diff --git a/src/template.h b/src/template.h
new file mode 100644
index 0000000..116be10
--- /dev/null
+++ b/src/template.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright (C) 2009 Tommi Maekitalo
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#ifndef ZIM_TEMPLATE_H
+#define ZIM_TEMPLATE_H
+
+#include <string>
+
+namespace zim
+{
+  class TemplateParser
+  {
+    public:
+      class Event
+      {
+        public:
+          virtual void onData(const std::string& data) = 0;
+          virtual void onToken(const std::string& token) = 0;
+          virtual void onLink(char ns, const std::string& url) = 0;
+          virtual ~Event() = default;
+      };
+
+    private:
+      Event* event;
+
+      std::string data;
+      std::string::size_type save;
+      std::string::size_type token;
+      std::string::size_type token_e;
+      char ns;
+      typedef void (TemplateParser::*state_type)(char);
+
+      state_type state;
+
+      void state_data(char ch);
+      void state_lt(char ch);
+      void state_token0(char ch);
+      void state_token(char ch);
+      void state_token_end(char ch);
+      void state_link0(char ch);
+      void state_link(char ch);
+      void state_title(char ch);
+      void state_title_end(char ch);
+
+    public:
+      explicit TemplateParser(Event* ev)
+        : event(ev),
+          state(&TemplateParser::state_data)
+        { }
+
+      void parse(char ch)
+      {
+        (this->*state)(ch);
+      }
+
+      void parse(const std::string& s)
+      {
+        for (std::string::const_iterator ch = s.begin(); ch != s.end(); ++ch)
+          parse(*ch);
+      }
+
+      void flush();
+  };
+}
+
+#endif // ZIM_TEMPLATE_H
diff --git a/src/tools.cpp b/src/tools.cpp
new file mode 100644
index 0000000..6142b2e
--- /dev/null
+++ b/src/tools.cpp
@@ -0,0 +1,214 @@
+/*
+ * Copyright (C) 2016-2021 Matthieu Gautier <mgautier@kymeria.fr>
+ * Copyright (C) 2021 Maneeshs P M <manu.pm55@gmail.com>
+ * Copyright (C) 2013-2016 Emmanuel Engelhart <kelson@kiwix.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU  General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#include "tools.h"
+#include "fs.h"
+
+#include <sys/types.h>
+#include <string.h>
+#include <stdio.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <memory>
+#include <mutex>
+#include <stdexcept>
+#include <random>
+#include <errno.h>
+
+#ifdef _WIN32
+# include <windows.h>
+# include <direct.h>
+# include <io.h>
+# include <stringapiset.h>
+# define SEPARATOR "\\"
+#else
+# include <unistd.h>
+# define SEPARATOR "/"
+#endif
+
+#ifdef __MINGW32__
+# include <time.h>
+#else
+# include <thread>
+# include <chrono>
+#endif
+
+bool zim::isCompressibleMimetype(const std::string& mimetype)
+{
+  return mimetype.find("text") == 0
+      || mimetype.find("+xml") != std::string::npos
+      || mimetype.find("+json") != std::string::npos
+      || mimetype == "application/javascript"
+      || mimetype == "application/json";
+}
+
+uint32_t zim::countWords(const std::string& text)
+{
+  unsigned int numWords = 0;
+  unsigned int length = text.size();
+  unsigned int i = 0;
+
+  // Find first word
+  while ( i < length && std::isspace(text[i]) ) i++;
+
+  while ( i < length ) {
+    // Find end of word
+    while ( i < length && !std::isspace(text[i]) ) i++;
+    numWords++;
+    // Find start of next word
+    while ( i < length && std::isspace(text[i]) ) i++;
+  }
+  return numWords;
+}
+
+
+void zim::microsleep(int microseconds) {
+#ifdef __MINGW32__
+   struct timespec wait = {0, 0};
+   wait.tv_sec = microseconds / 1000000;
+   wait.tv_nsec = (microseconds - wait.tv_sec*10000) * 1000;
+   nanosleep(&wait, nullptr);
+#else
+   std::this_thread::sleep_for(std::chrono::microseconds(microseconds));
+#endif
+}
+
+
+std::tuple<char, std::string> zim::parseLongPath(const std::string& longPath)
+{
+  /* Index of the namespace char; discard '/' from absolute paths */
+  const unsigned int i = (longPath[0] == '/') ? 1 : 0;
+  if (i + 1 > longPath.size() || longPath[i] == '/' || (i + 1 < longPath.size() && longPath[i+1] != '/'))
+    throw std::runtime_error("Cannot parse path");
+
+  auto ns = longPath[i];
+  auto shortPath = longPath.substr(std::min<unsigned int>(i+2, (unsigned int)longPath.size()));
+
+  return std::make_tuple(ns, shortPath);
+}
+
+unsigned int zim::parseIllustrationPathToSize(const std::string& s)
+{
+  int nw(0), nh(0), nEnd(0);
+  long int w(-1), h(-1);
+  if ( sscanf(s.c_str(), "Illustration_%n%ldx%n%ld@1%n)", &nw, &w, &nh, &h, &nEnd) == 2
+     && (size_t)nEnd == s.size() && !isspace(s[nw]) && !isspace(s[nh]) && w == h && w >= 0) {
+    return (unsigned int)w;
+  }
+  throw std::runtime_error("");
+}
+
+uint32_t zim::randomNumber(uint32_t max)
+{
+  static std::default_random_engine random(
+    std::chrono::system_clock::now().time_since_epoch().count());
+  static std::mutex mutex;
+
+  std::lock_guard<std::mutex> l(mutex);
+  return ((double)random() / random.max()) * max;
+}
+
+/* Split string in a token array */
+std::vector<std::string> zim::split(const std::string & str,
+                                const std::string & delims)
+{
+  std::string::size_type lastPos = str.find_first_not_of(delims, 0);
+  std::string::size_type pos = str.find_first_of(delims, lastPos);
+  std::vector<std::string> tokens;
+
+  while (std::string::npos != pos || std::string::npos != lastPos)
+    {
+      tokens.push_back(str.substr(lastPos, pos - lastPos));
+      lastPos = str.find_first_not_of(delims, pos);
+      pos     = str.find_first_of(delims, lastPos);
+    }
+
+  return tokens;
+}
+
+std::map<std::string, int> zim::read_valuesmap(const std::string &s) {
+    std::map<std::string, int> result;
+    std::vector<std::string> elems = split(s, ";");
+    for(std::vector<std::string>::iterator elem = elems.begin();
+        elem != elems.end();
+        elem++)
+    {
+        std::vector<std::string> tmp_elems = split(*elem, ":");
+        result.insert( std::pair<std::string, int>(tmp_elems[0], atoi(tmp_elems[1].c_str())) );
+    }
+    return result;
+}
+
+// Xapian based tools
+#if defined(ENABLE_XAPIAN)
+
+#include "xapian.h"
+
+#include <unicode/translit.h>
+#include <unicode/ucnv.h>
+#include <unicode/putil.h>
+std::string zim::removeAccents(const std::string& text)
+{
+  ucnv_setDefaultName("UTF-8");
+  static UErrorCode status = U_ZERO_ERROR;
+  static std::unique_ptr<icu::Transliterator> removeAccentsTrans(icu::Transliterator::createInstance(
+      "Lower; NFD; [:M:] remove; NFC", UTRANS_FORWARD, status));
+  icu::UnicodeString ustring(text.c_str());
+  removeAccentsTrans->transliterate(ustring);
+  std::string unaccentedText;
+  ustring.toUTF8String(unaccentedText);
+  return unaccentedText;
+}
+
+bool zim::getDbFromAccessInfo(zim::Item::DirectAccessInfo accessInfo, Xapian::Database& database) {
+  zim::DEFAULTFS::FD databasefd;
+  try {
+      databasefd = zim::DEFAULTFS::openFile(accessInfo.first);
+  } catch (...) {
+      std::cerr << "Impossible to open " << accessInfo.first << std::endl;
+      std::cerr << strerror(errno) << std::endl;
+      return false;
+  }
+  if (!databasefd.seek(zim::offset_t(accessInfo.second))) {
+      std::cerr << "Something went wrong seeking databasedb "
+                << accessInfo.first << std::endl;
+      std::cerr << "dbOffest = " << accessInfo.second << std::endl;
+      return false;
+  }
+
+  try {
+      database = Xapian::Database(databasefd.release());
+  } catch( Xapian::DatabaseError& e) {
+      std::cerr << "Something went wrong opening xapian database for zimfile "
+                << accessInfo.first << std::endl;
+      std::cerr << "dbOffest = " << accessInfo.second << std::endl;
+      std::cerr << "error = " << e.get_msg() << std::endl;
+      return false;
+  }
+
+  return true;
+}
+
+void setICUDataDirectory(const std::string& path)
+{
+  u_setDataDirectory(path.c_str());
+}
+#endif
diff --git a/src/tools.h b/src/tools.h
new file mode 100644
index 0000000..a42d4fd
--- /dev/null
+++ b/src/tools.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (C) 2016-2020 Matthieu Gautier <mgautier@kymeria.fr>
+ * Copyright (C) 2021 Maneesh P M <manu.pm55@gmail.com>
+ * Copyright (C) 2013-2016 Emmanuel Engelhart <kelson@kiwix.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU  General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#ifndef OPENZIM_LIBZIM_TOOLS_H
+#define OPENZIM_LIBZIM_TOOLS_H
+
+#include <string>
+#include <tuple>
+#include <map>
+#include <vector>
+#include "config.h"
+
+#include <zim/item.h>
+
+#if defined(ENABLE_XAPIAN)
+namespace Xapian {
+  class Database;
+}
+#endif  // ENABLE_XAPIAN
+namespace zim {
+  bool isCompressibleMimetype(const std::string& mimetype);
+  uint32_t countWords(const std::string& text);
+  void microsleep(int microseconds);
+
+  std::tuple<char, std::string> parseLongPath(const std::string& longPath);
+
+  // Parse a illustration path ("Illustration_<width>x<height>@1") to a size.
+  unsigned int parseIllustrationPathToSize(const std::string& s);
+
+  /** Return a random number from range [0, max]
+   *
+   * This function is threadsafe
+   **/
+  uint32_t randomNumber(uint32_t max);
+
+  std::vector<std::string> split(const std::string & str,
+                                const std::string & delims=" *-");
+
+  std::map<std::string, int> read_valuesmap(const std::string& s);
+
+// Xapian based tools
+#if defined(ENABLE_XAPIAN)
+  std::string removeAccents(const std::string& text);
+  bool getDbFromAccessInfo(Item::DirectAccessInfo accessInfo, Xapian::Database& database);
+#endif
+}
+
+#endif  //Â OPENZIM_LIBZIM_TOOLS_H
diff --git a/src/uuid.cpp b/src/uuid.cpp
new file mode 100644
index 0000000..950db38
--- /dev/null
+++ b/src/uuid.cpp
@@ -0,0 +1,112 @@
+/*
+ * Copyright (C) 2021 Maneesh P M <manu.pm55@gmail.com>
+ * Copyright (C) 2018-2020 Matthieu Gautier <mgautier@kymeria.fr>
+ * Copyright (C) 2009 Tommi Maekitalo
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#include <zim/uuid.h>
+#include <iostream>
+#include <sstream>
+#include <time.h>
+#include <zim/zim.h> // necessary to have the new types
+#include "log.h"
+#include "md5.h"
+
+#ifdef _WIN32
+
+#  include <time.h>
+#  include <windows.h>
+int gettimeofday(struct timeval* tp, void* tzp) {
+    DWORD t;
+    t = timeGetTime();
+    tp->tv_sec = t / 1000;
+    tp->tv_usec = t % 1000;
+    return 0;
+}
+
+#define getpid GetCurrentProcessId
+
+#else
+#  include <sys/time.h>
+#endif
+
+log_define("zim.uuid")
+
+namespace zim
+{
+  namespace
+  {
+    char hex[] = "0123456789abcdef";
+    inline char hi(char v)
+    { return hex[(v >> 4) & 0xf]; }
+
+    inline char lo(char v)
+    { return hex[v & 0xf]; }
+  }
+
+  Uuid Uuid::generate(std::string value)
+  {
+    Uuid ret;
+    struct zim_MD5_CTX md5ctx;
+    zim_MD5Init(&md5ctx);
+
+    if ( value.empty() ) {
+      struct timeval tv;
+      gettimeofday(&tv, 0);
+
+      clock_t c = clock();
+
+      zim_MD5Update(&md5ctx, reinterpret_cast<const uint8_t*>(&c), sizeof(clock_t));
+      zim_MD5Update(&md5ctx, reinterpret_cast<const uint8_t*>(&tv), sizeof(struct timeval));
+    } else {
+      zim_MD5Update(&md5ctx, reinterpret_cast<const uint8_t*>(value.data()), value.size());
+    }
+    zim_MD5Final(reinterpret_cast<uint8_t*>(&ret.data[0]), &md5ctx);
+
+    log_debug("generated uuid: " << ret.data);
+
+    return ret;
+  }
+
+  Uuid::operator std::string() const
+  {
+    std::ostringstream out;
+    zim::operator<<(out, *this);
+    return out.str();
+  }
+
+  std::ostream& operator<< (std::ostream& out, const Uuid& uuid)
+  {
+    for (unsigned n = 0; n < 4; ++n)
+      out << hi(uuid.data[n]) << lo(uuid.data[n]);
+    out << '-';
+    for (unsigned n = 4; n < 6; ++n)
+      out << hi(uuid.data[n]) << lo(uuid.data[n]);
+    out << '-';
+    for (unsigned n = 6; n < 8; ++n)
+      out << hi(uuid.data[n]) << lo(uuid.data[n]);
+    out << '-';
+    for (unsigned n = 8; n < 10; ++n)
+      out << hi(uuid.data[n]) << lo(uuid.data[n]);
+    out << '-';
+    for (unsigned n = 10; n < 16; ++n)
+      out << hi(uuid.data[n]) << lo(uuid.data[n]);
+    return out;
+  }
+
+}
diff --git a/src/version.cpp b/src/version.cpp
new file mode 100644
index 0000000..5f6b077
--- /dev/null
+++ b/src/version.cpp
@@ -0,0 +1,64 @@
+/*
+ * Copyright (C) 2021 Emmanuel Engelhart <kelson@kiwix.org>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#include <iostream>
+#include <sstream>
+
+#include <zim/version.h>
+#include <zim/zim_config.h>
+#include <config.h>
+#include <zstd.h>
+#include <lzma.h>
+
+#if defined(ENABLE_XAPIAN)
+#include <xapian.h>
+#include <unicode/uversion.h>
+#endif
+
+namespace zim
+{
+  LibVersions getVersions() {
+    LibVersions versions = {
+      { "libzim",  LIBZIM_VERSION      },
+      { "libzstd", ZSTD_VERSION_STRING },
+      { "liblzma", LZMA_VERSION_STRING }
+    };
+
+#if defined(ENABLE_XAPIAN)
+    // Libxapian is not a mandatory dependence
+    versions.push_back({ "libxapian", XAPIAN_VERSION });
+
+    // U_ICU_VERSION does not include the patch level if 0
+    std::ostringstream libicu_version;
+    libicu_version << U_ICU_VERSION_MAJOR_NUM << "." << U_ICU_VERSION_MINOR_NUM << "." << U_ICU_VERSION_PATCHLEVEL_NUM;
+    versions.push_back({ "libicu", libicu_version.str() });
+#endif
+
+    return versions;
+  }
+
+void printVersions(std::ostream& out) {
+  LibVersions versions = getVersions();
+  for (const auto& iter : versions) {
+    out << (iter != versions.front() ? "+ " : "") <<
+      iter.first << " " << iter.second << std::endl;
+  }
+}
+
+} //namespace zim
diff --git a/src/writer/_dirent.h b/src/writer/_dirent.h
new file mode 100644
index 0000000..7028013
--- /dev/null
+++ b/src/writer/_dirent.h
@@ -0,0 +1,247 @@
+/*
+ * Copyright (C) 2018-2021 Matthieu Gautier <mgautier@kymeria.fr>
+ * Copyright (C) 2009 Tommi Maekitalo
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#ifndef ZIM_WRITER_DIRENT_H
+#define ZIM_WRITER_DIRENT_H
+
+#include "cluster.h"
+#include "tinyString.h"
+
+#include "debug.h"
+
+namespace zim
+{
+  namespace writer {
+    class Dirent;
+
+    // Be sure that enum value are sorted by "alphabetical" order
+    enum class NS: uint8_t {
+      C = 0,
+      M = 1,
+      W = 2,
+      X = 3
+    };
+
+    char NsAsChar(NS ns);
+
+    class DirentInfo {
+      public: // structures
+        struct Direct {
+          Direct() :
+            cluster(nullptr),
+            blobNumber(0)
+          {};
+          Cluster*         cluster;
+          blob_index_t     blobNumber;
+        } PACKED;
+
+        struct Redirect {
+          Redirect(NS ns, const std::string& target) :
+            targetPath(target),
+            ns(ns)
+          {};
+          Redirect(Redirect&& r) = default;
+          ~Redirect() {};
+          TinyString targetPath;
+          NS ns;
+        } PACKED;
+
+        struct Resolved {
+          Resolved(const Dirent* target) :
+            targetDirent(target)
+          {};
+          const Dirent* targetDirent;
+        } PACKED;
+
+      public: // functions
+        ~DirentInfo() {
+          switch(tag) {
+            case DIRECT:
+             direct.~Direct();
+              break;
+            case REDIRECT:
+              redirect.~Redirect();
+              break;
+            case RESOLVED:
+              resolved.~Resolved();
+              break;
+          }
+        };
+        DirentInfo(Direct&& d):
+          direct(std::move(d)),
+          tag(DirentInfo::DIRECT)
+        {}
+        DirentInfo(Redirect&& r):
+          redirect(std::move(r)),
+          tag(DirentInfo::REDIRECT)
+        {}
+        DirentInfo(Resolved&& r):
+          resolved(std::move(r)),
+          tag(DirentInfo::RESOLVED)
+        {}
+        DirentInfo::Direct& getDirect() {
+          ASSERT(tag, ==, DIRECT);
+          return direct;
+        }
+        DirentInfo::Redirect& getRedirect() {
+          ASSERT(tag, ==, REDIRECT);
+          return redirect;
+        }
+        DirentInfo::Resolved& getResolved() {
+          ASSERT(tag, ==, RESOLVED);
+          return resolved;
+        }
+        const DirentInfo::Direct& getDirect() const {
+          ASSERT(tag, ==, DIRECT);
+          return direct;
+        }
+        const DirentInfo::Redirect& getRedirect() const {
+          ASSERT(tag, ==, REDIRECT);
+          return redirect;
+        }
+        const DirentInfo::Resolved& getResolved() const {
+          ASSERT(tag, ==, RESOLVED);
+          return resolved;
+        }
+
+      private: // members
+        union {
+          Direct direct;
+          Redirect redirect;
+          Resolved resolved;
+        } PACKED;
+
+      public: // members
+        enum : char {DIRECT, REDIRECT, RESOLVED} tag;
+    } PACKED;
+
+    class Dirent
+    {
+        static const uint16_t redirectMimeType = 0xffff;
+        static const uint32_t version = 0;
+
+        PathTitleTinyString pathTitle;
+        uint16_t mimeType;
+        entry_index_t idx = entry_index_t(0);
+        DirentInfo info;
+        offset_t offset;
+        uint8_t _ns : 2;
+        bool removed : 1;
+        bool frontArticle : 1;
+
+      public:
+        // Creator for a "classic" dirent
+        Dirent(NS ns, const std::string& path, const std::string& title, uint16_t mimetype);
+
+        // Creator for a "redirection" dirent
+        Dirent(NS ns, const std::string& path, const std::string& title, NS targetNs, const std::string& targetPath);
+
+        // Creator for "temporary" dirent, used to search for dirent in container.
+        // We use them in url ordered container so we only need to set the namespace and the path.
+        // Other value are irrelevant.
+        Dirent(NS ns, const std::string& path)
+          : Dirent(ns, path, "", 0)
+          { }
+
+        NS getNamespace() const           { return static_cast<NS>(_ns); }
+        std::string getTitle() const      { return pathTitle.getTitle(false); }
+        std::string getRealTitle() const      { return pathTitle.getTitle(true); }
+        std::string getPath() const       { return pathTitle.getPath(); }
+
+        uint32_t getVersion() const            { return version; }
+
+        NS getRedirectNs() const;
+        std::string getRedirectPath() const;
+        void setRedirect(const Dirent* target) {
+          ASSERT(info.tag, ==, DirentInfo::REDIRECT);
+          info.~DirentInfo();
+          new(&info) DirentInfo(DirentInfo::Resolved(target));
+        }
+        entry_index_t getRedirectIndex() const      {
+          return info.getResolved().targetDirent->getIdx();
+        }
+
+        void setIdx(entry_index_t idx_)      { idx = idx_; }
+        entry_index_t getIdx() const         { return idx; }
+
+
+        void setCluster(zim::writer::Cluster* _cluster)
+        {
+          auto& direct = info.getDirect();
+          direct.cluster = _cluster;
+          direct.blobNumber = _cluster->count();
+        }
+
+        zim::writer::Cluster* getCluster()
+        {
+          return info.getDirect().cluster;
+        }
+
+        cluster_index_t getClusterNumber() const {
+          auto& direct = info.getDirect();
+          return direct.cluster ? direct.cluster->getClusterIndex() : cluster_index_t(0);
+        }
+        blob_index_t  getBlobNumber() const {
+          return info.getDirect().blobNumber;
+        }
+
+        bool isRedirect() const                 { return mimeType == redirectMimeType; }
+        bool isItem() const                     { return !isRedirect(); }
+        uint16_t getMimeType() const            { return mimeType; }
+        void setMimeType(uint16_t m) {
+          ASSERT(info.tag, ==, DirentInfo::DIRECT);
+          mimeType = m;
+        }
+        size_t getDirentSize() const
+        {
+          return (isRedirect() ? 12 : 16) + pathTitle.size() + 1;
+        }
+
+        offset_t getOffset() const { return offset; }
+        void setOffset(offset_t o) { offset = o; }
+
+        bool isRemoved() const { return removed; }
+        void markRemoved() { removed = true; }
+
+        bool isFrontArticle() const { return frontArticle; }
+        void setFrontArticle() { frontArticle = true; }
+
+        void write(int out_fd) const;
+
+        friend bool compareUrl(const Dirent* d1, const Dirent* d2);
+        friend inline bool compareTitle(const Dirent* d1, const Dirent* d2);
+    } PACKED;
+
+
+    inline bool compareUrl(const Dirent* d1, const Dirent* d2)
+    {
+      return d1->getNamespace() < d2->getNamespace()
+        || (d1->getNamespace() == d2->getNamespace() && d1->getPath() < d2->getPath());
+    }
+    inline bool compareTitle(const Dirent* d1, const Dirent* d2)
+    {
+      return d1->getNamespace() < d2->getNamespace()
+        || (d1->getNamespace() == d2->getNamespace() && d1->getTitle() < d2->getTitle());
+    }
+  }
+}
+
+#endif // ZIM_WRITER_DIRENT_H
+
diff --git a/src/writer/cluster.cpp b/src/writer/cluster.cpp
new file mode 100644
index 0000000..528f8e5
--- /dev/null
+++ b/src/writer/cluster.cpp
@@ -0,0 +1,251 @@
+/*
+ * Copyright (C) 2017-2021 Matthieu Gautier <mgautier@kymeria.fr>
+ * Copyright (C) 2021 Veloman Yunkan
+ * Copyright (C) 2020 Emmanuel Engelhart <kelson@kiwix.org>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#include "cluster.h"
+#include "../log.h"
+#include "../endian_tools.h"
+#include "../debug.h"
+#include "../compression.h"
+
+#include <zim/writer/contentProvider.h>
+
+#include <sstream>
+#include <fstream>
+
+#include <fcntl.h>
+#include <stdexcept>
+
+#ifdef _WIN32
+# include <io.h>
+#else
+# include <unistd.h>
+# define _write(fd, addr, size) ::write((fd), (addr), (size))
+#endif
+
+const zim::size_type MAX_WRITE_SIZE(4UL*1024*1024*1024-1);
+
+namespace zim {
+namespace writer {
+
+Cluster::Cluster(Compression compression)
+  : compression(compression),
+    isExtended(false),
+    _size(0)
+{
+  blobOffsets.push_back(offset_t(0));
+}
+
+Cluster::~Cluster() {
+  if (compressed_data.data()) {
+    delete[] compressed_data.data();
+  }
+}
+
+void Cluster::clear_data() {
+  clear_raw_data();
+  clear_compressed_data();
+}
+
+void Cluster::clear_raw_data() {
+  Offsets().swap(blobOffsets);
+  ClusterProviders().swap(m_providers);
+}
+
+void Cluster::clear_compressed_data() {
+  if (compressed_data.data()) {
+    delete[] compressed_data.data();
+    compressed_data = Blob();
+  }
+}
+
+void Cluster::close() {
+  if (getCompression() != Compression::None) {
+    // We must compress the content in a buffer.
+    compress();
+    clear_raw_data();
+  }
+  closed = true;
+}
+
+bool Cluster::isClosed() const{
+  return closed;
+}
+
+zsize_t Cluster::size() const
+{
+  if (isClosed()) {
+    throw std::runtime_error("oups");
+  }
+  if (isExtended) {
+    return zsize_t(blobOffsets.size() * sizeof(uint64_t)) + _size;
+  } else {
+    return zsize_t(blobOffsets.size() * sizeof(uint32_t)) + _size;
+  }
+}
+
+template<typename OFFSET_TYPE>
+void Cluster::write_offsets(writer_t writer) const
+{
+  size_type delta = blobOffsets.size() * sizeof(OFFSET_TYPE);
+  char out_buf[sizeof(OFFSET_TYPE)];
+  for (auto offset : blobOffsets)
+  {
+    offset.v += delta;
+    toLittleEndian(static_cast<OFFSET_TYPE>(offset.v), out_buf);
+    writer(Blob(out_buf, sizeof(OFFSET_TYPE)));
+  }
+}
+
+void Cluster::write_content(writer_t writer) const
+{
+  if (isExtended) {
+    write_offsets<uint64_t>(writer);
+  } else {
+    write_offsets<uint32_t>(writer);
+  }
+  write_data(writer);
+}
+
+void Cluster::compress()
+{
+  auto comp = getCompression();
+  switch(comp) {
+    case Compression::Zstd:
+      {
+        _compress<ZSTD_INFO>();
+        break;
+      }
+
+    default:
+      throw std::runtime_error("We cannot compress an uncompressed cluster");
+  };
+}
+
+template<typename COMP_TYPE>
+void Cluster::_compress()
+{
+  Compressor<COMP_TYPE> runner;
+  bool first = true;
+  auto writer = [&](const Blob& data) -> void {
+    if (first) {
+      runner.init((char*)data.data());
+      first = false;
+    }
+    runner.feed(data.data(), data.size());
+  };
+  write_content(writer);
+  zsize_t size;
+  auto comp = runner.get_data(&size);
+  compressed_data = Blob(comp.release(), size.v);
+}
+
+void Cluster::write(int out_fd) const
+{
+  // write clusterInfo
+  char clusterInfo = 0;
+  if (isExtended) {
+    clusterInfo = 0x10;
+  }
+  clusterInfo += static_cast<uint8_t>(getCompression());
+  if (_write(out_fd, &clusterInfo, 1) == -1) {
+    throw std::runtime_error("Error writing");
+  }
+
+  // Open a comprestion stream if needed
+  switch(getCompression())
+  {
+    case Compression::None:
+    {
+      auto writer = [=](const Blob& data) -> void {
+        // Ideally we would simply have to do :
+        // ::write(tmp_fd, data.c_str(), data.size());
+        // However, the data can be pretty big (> 4Gb), especially with test,
+        // And ::write fails to write data > 4Gb. So we have to chunck the write.
+        size_type to_write = data.size();
+        const char* src = data.data();
+        while (to_write) {
+         size_type chunk_size = std::min(MAX_WRITE_SIZE, to_write);
+         auto ret = _write(out_fd, src, chunk_size);
+         src += ret;
+         to_write -= ret;
+        }
+      };
+      write_content(writer);
+      break;
+    }
+
+    case Compression::Zstd:
+      {
+        log_debug("compress data");
+        if (_write(out_fd, compressed_data.data(), compressed_data.size()) == -1) {
+          throw std::runtime_error("Error writing");
+        }
+        break;
+      }
+
+    default:
+      std::ostringstream msg;
+      msg << "invalid compression flag " << static_cast<uint8_t>(getCompression());
+      log_error(msg.str());
+      throw std::runtime_error(msg.str());
+  }
+}
+
+
+void Cluster::addContent(std::unique_ptr<ContentProvider> provider)
+{
+  auto size = provider->getSize();
+  _size += size;
+  blobOffsets.push_back(offset_t(_size.v));
+  m_count++;
+  isExtended |= (_size.v>UINT32_MAX);
+  if (size == 0)
+    return;
+
+  m_providers.push_back(std::move(provider));
+}
+
+void Cluster::addContent(const std::string& data)
+{
+  auto contentProvider = std::unique_ptr<ContentProvider>(new StringProvider(data));
+  addContent(std::move(contentProvider));
+}
+
+void Cluster::write_data(writer_t writer) const
+{
+  for (auto& provider: m_providers)
+  {
+    ASSERT(provider->getSize(), !=, 0U);
+    zim::size_type size = 0;
+    while(true) {
+      auto blob = provider->feed();
+      if(blob.size() == 0) {
+        break;
+      }
+      size += blob.size();
+      writer(blob);
+    }
+    ASSERT(size, ==, provider->getSize());
+  }
+}
+
+} // writer
+} // zim
diff --git a/src/writer/cluster.h b/src/writer/cluster.h
new file mode 100644
index 0000000..ce6aa3a
--- /dev/null
+++ b/src/writer/cluster.h
@@ -0,0 +1,109 @@
+/*
+ * Copyright (C) 2017-2021 Matthieu Gautier <mgautier@kymeria.fr>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#ifndef ZIM_WRITER_CLUSTER_H_
+#define ZIM_WRITER_CLUSTER_H_
+
+#include <zim/zim.h>
+#include <zim/blob.h>
+#include <iostream>
+#include <vector>
+#include <functional>
+#include <atomic>
+
+#include <zim/writer/item.h>
+#include "../zim_types.h"
+#include "../debug.h"
+
+namespace zim {
+
+namespace writer {
+
+using writer_t = std::function<void(const Blob& data)>;
+class ContentProvider;
+
+class Cluster {
+  typedef std::vector<offset_t> Offsets;
+  typedef std::vector<std::unique_ptr<ContentProvider>> ClusterProviders;
+
+
+  public:
+    Cluster(Compression compression);
+    virtual ~Cluster();
+
+    void setCompression(Compression c) { compression = c; }
+    Compression getCompression() const { return compression; }
+
+    void addContent(std::unique_ptr<ContentProvider> provider);
+    void addContent(const std::string& data);
+
+    blob_index_t count() const  { return blob_index_t(m_count); }
+    zsize_t size() const;
+    offset_t getOffset() const { return offset; }
+    void setOffset(offset_t o) { offset = o; }
+    bool is_extended() const { return isExtended; }
+    void clear_data();
+    void close();
+    bool isClosed() const;
+
+    void setClusterIndex(cluster_index_t idx) { index = idx; }
+    cluster_index_t getClusterIndex() const { return index; }
+
+    zsize_t getBlobSize(blob_index_t n) const
+    { return zsize_t(blobOffsets[blob_index_type(n)+1].v - blobOffsets[blob_index_type(n)].v); }
+
+    offset_t getBlobOffset(blob_index_t n) const { return blobOffsets[n.v]; }
+    offset_t getDataOffset() const {
+      ASSERT(bool(closed), ==, true);
+      return offset_t(1) + offset_t((count().v + 1) * (isExtended?sizeof(uint64_t):sizeof(uint32_t)));
+    }
+
+    void write(int out_fd) const;
+
+  protected:
+    Compression compression;
+    cluster_index_t index;
+    bool isExtended;
+    Offsets blobOffsets;
+    offset_t offset;
+    zsize_t _size;
+    ClusterProviders m_providers;
+    mutable Blob compressed_data;
+    std::string tmp_filename;
+    std::atomic<bool> closed { false };
+    blob_index_type m_count { 0 };
+
+  private:
+    void write_content(writer_t writer) const;
+    template<typename OFFSET_TYPE>
+    void write_offsets(writer_t writer) const;
+    void write_data(writer_t writer) const;
+    void compress();
+    template<typename COMP_INFO>
+    void _compress();
+    void clear_raw_data();
+    void clear_compressed_data();
+};
+
+};
+
+};
+
+
+#endif //ZIM_WRITER_CLUSTER_H_
diff --git a/src/writer/clusterWorker.cpp b/src/writer/clusterWorker.cpp
new file mode 100644
index 0000000..f820bcd
--- /dev/null
+++ b/src/writer/clusterWorker.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2020 Matthieu Gautier <mgautier@kymeria.fr>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#include "clusterWorker.h"
+
+#include "cluster.h"
+
+std::atomic<unsigned long> zim::writer::ClusterTask::waiting_task(0);
+
+namespace zim
+{
+  namespace writer
+  {
+
+    void ClusterTask::run(CreatorData* data) {
+      cluster->close();
+    };
+
+  }
+}
diff --git a/src/writer/clusterWorker.h b/src/writer/clusterWorker.h
new file mode 100644
index 0000000..66e0dcc
--- /dev/null
+++ b/src/writer/clusterWorker.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2021 Matthieu Gautier <mgautier@kymeria.fr>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU  General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#ifndef OPENZIM_LIBZIM_CLUSTER_WORKER_H
+#define OPENZIM_LIBZIM_CLUSTER_WORKER_H
+
+#include <atomic>
+#include "workers.h"
+
+namespace zim {
+namespace writer {
+
+class Cluster;
+
+class ClusterTask : public Task {
+  public:
+    ClusterTask(const ClusterTask&) = delete;
+    ClusterTask& operator=(const ClusterTask&) = delete;
+    explicit ClusterTask(Cluster* cluster) :
+      cluster(cluster)
+    {
+      ++waiting_task;
+    };
+    virtual ~ClusterTask()
+    {
+      --waiting_task;
+    }
+
+    virtual void run(CreatorData* data);
+    static std::atomic<unsigned long> waiting_task;
+
+  private:
+    Cluster* cluster;
+};
+
+}
+}
+
+#endif // OPENZIM_LIBZIM_QUEUE_H
diff --git a/src/writer/contentProvider.cpp b/src/writer/contentProvider.cpp
new file mode 100644
index 0000000..0896d90
--- /dev/null
+++ b/src/writer/contentProvider.cpp
@@ -0,0 +1,73 @@
+/*
+ * Copyright (C) 2020 Matthieu Gautier <mgautier@kymeria.fr>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#include <zim/writer/contentProvider.h>
+
+#include "../fs.h"
+
+const zim::size_type BUFFER_SIZE(1024*1024);
+
+namespace zim
+{
+  namespace writer
+  {
+    Blob StringProvider::feed()
+    {
+      if (feeded) {
+        return Blob(nullptr, 0);
+      }
+      feeded = true;
+      return Blob(content.data(), content.size());
+    }
+
+    Blob SharedStringProvider::feed()
+    {
+      if (feeded) {
+        return Blob(nullptr, 0);
+      }
+      feeded = true;
+      return Blob(content->data(), content->size());
+    }
+
+    FileProvider::FileProvider(const std::string& filepath)
+      : filepath(filepath),
+        buffer(new char[BUFFER_SIZE]),
+        fd(new DEFAULTFS::FD(DEFAULTFS::openFile(filepath))),
+        offset(0)
+    {
+      size = fd->getSize().v;
+    }
+
+    FileProvider::~FileProvider() = default;
+
+    Blob FileProvider::feed()
+    {
+      auto sizeToRead = std::min(BUFFER_SIZE, size-offset);
+      if (!sizeToRead) {
+        return Blob(nullptr, 0);
+      }
+
+      if(fd->readAt(buffer.get(), zim::zsize_t(sizeToRead), zim::offset_t(offset)).v == -1UL) {
+        throw std::runtime_error("Error reading file " + filepath);
+      }
+      offset += sizeToRead;
+      return Blob(buffer.get(), sizeToRead);
+    }
+  }
+}
diff --git a/src/writer/counterHandler.cpp b/src/writer/counterHandler.cpp
new file mode 100644
index 0000000..b29df05
--- /dev/null
+++ b/src/writer/counterHandler.cpp
@@ -0,0 +1,75 @@
+/*
+ * Copyright (C) 2021 Matthieu Gautier <mgautier@kymeria.fr>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU  General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#include "counterHandler.h"
+#include "creatordata.h"
+
+#include <zim/writer/contentProvider.h>
+#include <zim/blob.h>
+
+using namespace zim::writer;
+
+CounterHandler::CounterHandler(CreatorData* data)
+  : mp_creatorData(data)
+{}
+
+CounterHandler::~CounterHandler() = default;
+
+void CounterHandler::start() {
+}
+
+void CounterHandler::stop() {
+}
+
+DirentHandler::Dirents CounterHandler::createDirents() const {
+  Dirents ret;
+  ret.push_back(mp_creatorData->createDirent(NS::M, "Counter", "text/plain", ""));
+  return ret;
+}
+
+DirentHandler::ContentProviders CounterHandler::getContentProviders() const {
+  ContentProviders ret;
+  std::stringstream ss;
+  bool first = true;
+  for(auto pair: m_mimetypeCounter) {
+    if (! first) {
+      ss << ";";
+    }
+    ss << pair.first << "=" << pair.second;
+    first = false;
+  }
+  ret.push_back(std::unique_ptr<ContentProvider>(new StringProvider(ss.str())));
+  return ret;
+}
+
+void CounterHandler::handle(Dirent* dirent, const Hints& hints)
+{
+}
+
+void CounterHandler::handle(Dirent* dirent, std::shared_ptr<Item> item)
+{
+  if (dirent->getNamespace() != NS::C) {
+    return;
+  }
+  auto mimetype = item->getMimeType();
+  if (mimetype.empty()) {
+    return;
+  }
+  m_mimetypeCounter[mimetype] += 1;
+}
diff --git a/src/writer/counterHandler.h b/src/writer/counterHandler.h
new file mode 100644
index 0000000..17eb48c
--- /dev/null
+++ b/src/writer/counterHandler.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2021 Matthieu Gautier <mgautier@kymeria.fr>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU  General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#ifndef OPENZIM_LIBZIM_COUNTER_HANDLER_H
+#define OPENZIM_LIBZIM_COUNTER_HANDLER_H
+
+#include "handler.h"
+
+#include <map>
+
+namespace zim {
+namespace writer {
+
+
+class CounterHandler : public DirentHandler {
+  public:
+    typedef std::map<std::string, entry_index_type> Counter;
+
+    explicit CounterHandler(CreatorData* data);
+    virtual ~CounterHandler();
+
+    void start() override;
+    void stop() override;
+    bool isCompressible() override { return true; }
+    ContentProviders getContentProviders() const override;
+    void handle(Dirent* dirent, std::shared_ptr<Item> item) override;
+    void handle(Dirent* dirent, const Hints& hints) override;
+
+  private:
+    Dirents createDirents() const override;
+    CreatorData* mp_creatorData;
+    Counter m_mimetypeCounter;
+};
+
+}
+}
+
+#endif // OPENZIM_LIBZIM_COUNTER_HANDLER_H
diff --git a/src/writer/creator.cpp b/src/writer/creator.cpp
new file mode 100644
index 0000000..6eed73b
--- /dev/null
+++ b/src/writer/creator.cpp
@@ -0,0 +1,681 @@
+/*
+ * Copyright (C) 2019-2021 Matthieu Gautier <mgautier@kymeria.fr>
+ * Copyright (C) 2021 Maneesh P M <manu.pm55@gmail.com>
+ * Copyright (C) 2021 Veloman Yunkan
+ * Copyright (C) 2009 Tommi Maekitalo
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#include <zim/writer/creator.h>
+
+#include "config.h"
+
+#include "creatordata.h"
+#include "cluster.h"
+#include "debug.h"
+#include "workers.h"
+#include "clusterWorker.h"
+#include <zim/blob.h>
+#include <zim/writer/contentProvider.h>
+#include "../endian_tools.h"
+#include <algorithm>
+#include <fstream>
+#include "../md5.h"
+#include "../constants.h"
+#include "counterHandler.h"
+
+#if defined(ENABLE_XAPIAN)
+# include "xapianHandler.h"
+#endif
+
+#ifdef _WIN32
+# include <io.h>
+# include <fcntl.h>
+#else
+# include <unistd.h>
+# define _write(fd, addr, size) if(::write((fd), (addr), (size)) != (ssize_t)(size)) \
+{throw std::runtime_error("Error writing");}
+#endif
+
+#include <sys/stat.h>
+#include <stdio.h>
+#include <fcntl.h>
+#include <limits>
+#include <stdexcept>
+#include <sstream>
+#include <ctime>
+#include "log.h"
+#include "../fs.h"
+#include "../tools.h"
+
+log_define("zim.writer.creator")
+
+#define INFO(e) \
+    do { \
+        log_info(e); \
+        std::cout << e << std::endl; \
+    } while(false)
+
+#define TINFO(e) \
+    if (m_verbose) { \
+        double seconds = difftime(time(NULL), data->start_time); \
+        std::cout << "T:" << (int)(seconds) \
+                  << "; " << e << std::endl; \
+    }
+
+#define TPROGRESS() \
+    if (m_verbose ) { \
+        double seconds = difftime(time(NULL),data->start_time);  \
+        std::cout << "T:" << (int)seconds \
+                  << "; A:" << data->dirents.size() \
+                  << "; RA:" << data->nbRedirectItems \
+                  << "; CA:" << data->nbCompItems \
+                  << "; UA:" << data->nbUnCompItems \
+                  << "; C:" << data->nbClusters \
+                  << "; CC:" << data->nbCompClusters \
+                  << "; UC:" << data->nbUnCompClusters \
+                  << "; WC:" << data->taskList.size() \
+                  << std::endl; \
+    }
+
+
+#define CLUSTER_BASE_OFFSET 2048
+
+namespace zim
+{
+  namespace writer
+  {
+    Creator::Creator()
+      : m_clusterSize(DEFAULT_CLUSTER_SIZE)
+    {}
+    Creator::~Creator() = default;
+
+    Creator& Creator::configVerbose(bool verbose)
+    {
+      m_verbose = verbose;
+      return *this;
+    }
+
+    Creator& Creator::configCompression(Compression compression)
+    {
+      m_compression = compression;
+      return *this;
+    }
+
+    Creator& Creator::configClusterSize(zim::size_type targetSize)
+    {
+      m_clusterSize = targetSize;
+      return *this;
+    }
+
+    Creator& Creator::configIndexing(bool indexing, const std::string& language)
+    {
+      m_withIndex = indexing;
+      m_indexingLanguage = language;
+      return *this;
+    }
+
+    Creator& Creator::configNbWorkers(unsigned nbWorkers)
+    {
+      m_nbWorkers = nbWorkers;
+      return *this;
+    }
+
+    void Creator::startZimCreation(const std::string& filepath)
+    {
+      data = std::unique_ptr<CreatorData>(
+        new CreatorData(filepath, m_verbose, m_withIndex, m_indexingLanguage, m_compression, m_clusterSize)
+      );
+
+      for(unsigned i=0; i<m_nbWorkers; i++)
+      {
+        std::thread thread(taskRunner, this->data.get());
+        data->workerThreads.push_back(std::move(thread));
+      }
+
+      data->writerThread = std::thread(clusterWriter, this->data.get());
+    }
+
+    void Creator::addItem(std::shared_ptr<Item> item)
+    {
+      bool compressContent = item->getAmendedHints()[COMPRESS];
+      auto dirent = data->createItemDirent(item.get());
+      data->addItemData(dirent, item->getContentProvider(), compressContent);
+      data->handle(dirent, item);
+
+      if (data->dirents.size()%1000 == 0) {
+        TPROGRESS();
+      }
+    }
+
+    void Creator::addMetadata(const std::string& name, const std::string& content, const std::string& mimetype)
+    {
+      auto provider = std::unique_ptr<ContentProvider>(new StringProvider(content));
+      addMetadata(name, std::move(provider), mimetype);
+    }
+
+    void Creator::addMetadata(const std::string& name, std::unique_ptr<ContentProvider> provider, const std::string& mimetype)
+    {
+      auto compressContent = isCompressibleMimetype(mimetype);
+      auto dirent = data->createDirent(NS::M, name, mimetype, "");
+      data->addItemData(dirent, std::move(provider), compressContent);
+      data->handle(dirent);
+    }
+
+    void Creator::addIllustration(unsigned int size, const std::string& content)
+    {
+      auto provider = std::unique_ptr<ContentProvider>(new StringProvider(content));
+      addIllustration(size, std::move(provider));
+    }
+
+    void Creator::addIllustration(unsigned int size, std::unique_ptr<ContentProvider> provider)
+    {
+      std::stringstream ss;
+      ss << "Illustration_" << size << "x" << size << "@1";
+      addMetadata(ss.str(), std::move(provider), "image/png");
+    }
+
+    void Creator::addRedirection(const std::string& path, const std::string& title, const std::string& targetPath, const Hints& hints)
+    {
+      auto dirent = data->createRedirectDirent(NS::C, path, title, NS::C, targetPath);
+      if (data->dirents.size()%1000 == 0){
+        TPROGRESS();
+      }
+
+      data->handle(dirent, hints);
+    }
+
+    void Creator::finishZimCreation()
+    {
+      // Create a redirection for the mainPage.
+      // We need to keep the created dirent to set the fileheader.
+      // Dirent doesn't have to be deleted.
+      if (!m_mainPath.empty()) {
+        data->mainPageDirent = data->createRedirectDirent(NS::W, "mainPage", "", NS::C, m_mainPath);
+        data->handle(data->mainPageDirent);
+      }
+
+      TPROGRESS();
+
+      // mp_titleListingHandler is a special case, it have to handle all dirents (including itself)
+      for(auto& handler:data->m_direntHandlers) {
+        // This silently create all the needed dirents.
+        for(auto dirent:handler->getDirents()) {
+          data->mp_titleListingHandler->handle(dirent, Hints());
+        }
+      }
+
+      // Now we have all the dirents (but not the data), we must correctly set/fix the dirents
+      // before we ask data to the handlers
+      TINFO("ResolveRedirectIndexes");
+      data->resolveRedirectIndexes();
+
+      TINFO("Set entry indexes");
+      data->setEntryIndexes();
+
+      TINFO("Resolve mimetype");
+      data->resolveMimeTypes();
+
+      // We can now stop the direntHandlers, and get their content
+      bool titleListDirentSeen = false;
+      for(auto& handler:data->m_direntHandlers) {
+        handler->stop();
+        const auto& dirents = handler->getDirents();
+        if (dirents.empty()) {
+          continue;
+        }
+        auto providers = handler->getContentProviders();
+        ASSERT(dirents.size(), ==, providers.size());
+        auto provider_it = providers.begin();
+        for(auto& dirent:dirents) {
+          // As we use a "handler level" isCompressible, all content of the same handler
+          // must have the same compression.
+          data->addItemData(dirent, std::move(*provider_it), handler->isCompressible());
+          if (handler == data->mp_titleListingHandler && !titleListDirentSeen) {
+            // We have to get the offset of the titleList in the cluster before
+            // we close the cluster. Once the cluster is close, the offset information is dropped.
+            // This works only if titleListingHandler create the full (V0) titlelist in its first dirent.
+            data->m_titleListBlobOffset = data->uncompCluster->getBlobOffset(dirent->getBlobNumber());
+            titleListDirentSeen = true;
+          }
+          provider_it++;
+        }
+      }
+
+      // All the data has been added, we can now close all clusters
+      if (data->compCluster->count())
+        data->closeCluster(true);
+
+      if (data->uncompCluster->count())
+        data->closeCluster(false);
+
+      TINFO("Waiting for workers");
+      // wait all cluster compression has been done
+      unsigned int wait = 0;
+      do {
+        microsleep(wait);
+        wait += 10;
+      } while(ClusterTask::waiting_task.load() > 0);
+
+      data->quitAllThreads();
+
+      // Delete all handler (they will clean there own data)
+      data->m_direntHandlers.clear();
+
+      TINFO(data->dirents.size() << " title index created");
+      TINFO(data->clustersList.size() << " clusters created");
+
+      TINFO("write zimfileÂ :");
+      writeLastParts();
+      ::close(data->out_fd);
+      data->out_fd = -1;
+
+      TINFO("rename tmpfile to final one.");
+      DEFAULTFS::rename(data->tmpFileName, data->zimName);
+      data->tmpFileName.clear();
+
+      TINFO("finish");
+    }
+
+    void Creator::fillHeader(Fileheader* header) const
+    {
+      header->setMainPage(
+        data->mainPageDirent
+        ? entry_index_type(data->mainPageDirent->getIdx())
+        : std::numeric_limits<entry_index_type>::max());
+      header->setLayoutPage(std::numeric_limits<entry_index_type>::max());
+
+      header->setUuid( m_uuid );
+      header->setArticleCount( data->dirents.size() );
+
+      header->setMimeListPos( Fileheader::size );
+
+      // We assume here that titleListingHandler create the V0 listing in its first dirent.
+      auto cluster = data->mp_titleListingHandler->getDirents()[0]->getCluster();
+      header->setTitleIdxPos(
+        offset_type(cluster->getOffset() + cluster->getDataOffset() + data->m_titleListBlobOffset));
+
+      header->setClusterCount( data->clustersList.size() );
+    }
+
+    void Creator::writeLastParts() const
+    {
+      Fileheader header;
+      fillHeader(&header);
+
+      int out_fd = data->out_fd;
+
+      lseek(out_fd, header.getMimeListPos(), SEEK_SET);
+      TINFO(" write mimetype list");
+      for(auto& mimeType: data->mimeTypesList)
+      {
+        _write(out_fd, mimeType.c_str(), mimeType.size()+1);
+      }
+
+      _write(out_fd, "", 1);
+
+      ASSERT(lseek(out_fd, 0, SEEK_CUR), <, CLUSTER_BASE_OFFSET);
+
+      TINFO(" write directory entries");
+      lseek(out_fd, 0, SEEK_END);
+      for (Dirent* dirent: data->dirents)
+      {
+        dirent->setOffset(offset_t(lseek(out_fd, 0, SEEK_CUR)));
+        dirent->write(out_fd);
+      }
+
+      TINFO(" write url prt list");
+      header.setUrlPtrPos(lseek(out_fd, 0, SEEK_CUR));
+      for (auto& dirent: data->dirents)
+      {
+        char tmp_buff[sizeof(offset_type)];
+        toLittleEndian(dirent->getOffset(), tmp_buff);
+        _write(out_fd, tmp_buff, sizeof(offset_type));
+      }
+
+      TINFO(" write cluster offset list");
+      header.setClusterPtrPos(lseek(out_fd, 0, SEEK_CUR));
+      for (auto cluster : data->clustersList)
+      {
+        char tmp_buff[sizeof(offset_type)];
+        toLittleEndian(cluster->getOffset(), tmp_buff);
+        _write(out_fd, tmp_buff, sizeof(offset_type));
+      }
+
+      header.setChecksumPos(lseek(out_fd, 0, SEEK_CUR));
+
+      TINFO(" write header");
+      lseek(out_fd, 0, SEEK_SET);
+      header.write(out_fd);
+
+      TINFO(" write checksum");
+      struct zim_MD5_CTX md5ctx;
+      unsigned char batch_read[1024+1];
+      lseek(out_fd, 0, SEEK_SET);
+      zim_MD5Init(&md5ctx);
+      while (true) {
+         auto r = read(out_fd, batch_read, 1024);
+         if (r == -1) {
+           perror("Cannot read");
+           throw std::runtime_error("oups");
+         }
+         if (r == 0)
+           break;
+         batch_read[r] = 0;
+         zim_MD5Update(&md5ctx, batch_read, r);
+      }
+      unsigned char digest[16];
+      zim_MD5Final(digest, &md5ctx);
+      _write(out_fd, reinterpret_cast<const char*>(digest), 16);
+    }
+
+    CreatorData::CreatorData(const std::string& fname,
+                                   bool verbose,
+                                   bool withIndex,
+                                   std::string language,
+                                   Compression c,
+                                   size_t clusterSize)
+      : mainPageDirent(nullptr),
+        compression(c),
+        zimName(fname),
+        tmpFileName(fname + ".tmp"),
+        clusterSize(clusterSize),
+        withIndex(withIndex),
+        indexingLanguage(language),
+        verbose(verbose),
+        nbRedirectItems(0),
+        nbCompItems(0),
+        nbUnCompItems(0),
+        nbClusters(0),
+        nbCompClusters(0),
+        nbUnCompClusters(0),
+        start_time(time(NULL))
+    {
+#ifdef _WIN32
+      int flag = _O_RDWR | _O_CREAT | _O_TRUNC | _O_BINARY;
+      int mode =  _S_IREAD | _S_IWRITE;
+#else
+      int flag = O_RDWR | O_CREAT | O_TRUNC;
+      mode_t mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH;
+#endif
+      out_fd = open(tmpFileName.c_str(), flag, mode);
+      if (out_fd == -1){
+        perror(nullptr);
+        std::ostringstream ss;
+        ss << "Cannot create file " << tmpFileName;
+        throw std::runtime_error(ss.str());
+      }
+      if(lseek(out_fd, CLUSTER_BASE_OFFSET, SEEK_SET) != CLUSTER_BASE_OFFSET) {
+        close(out_fd);
+        perror(nullptr);
+        throw std::runtime_error("Impossible to seek in file");
+      }
+
+      // We keep both a "compressed cluster" and an "uncompressed cluster"
+      // because we don't know which one will fill up first.  We also need
+      // to track the dirents currently in each, so we can fix up the
+      // cluster index if the other one ends up written first.
+      compCluster = new Cluster(compression);
+      uncompCluster = new Cluster(Compression::None);
+
+#if defined(ENABLE_XAPIAN)
+      auto xapianIndexer = std::make_shared<XapianHandler>(this, withIndex);
+      m_direntHandlers.push_back(xapianIndexer);
+#endif
+
+      mp_titleListingHandler = std::make_shared<TitleListingHandler>(this);
+      m_direntHandlers.push_back(mp_titleListingHandler);
+      m_direntHandlers.push_back(std::make_shared<CounterHandler>(this));
+
+      for(auto& handler:m_direntHandlers) {
+        handler->start();
+      }
+    }
+
+    CreatorData::~CreatorData()
+    {
+      quitAllThreads();
+      if (compCluster)
+        delete compCluster;
+      if (uncompCluster)
+        delete uncompCluster;
+      for(auto& cluster: clustersList) {
+        delete cluster;
+      }
+      if ( out_fd != - 1 ) {
+        ::close(out_fd);
+      }
+      if ( ! tmpFileName.empty() ) {
+        DEFAULTFS::removeFile(tmpFileName);
+      }
+    }
+
+    void CreatorData::quitAllThreads() {
+      // Quit all workerThreads
+      for (auto i=0U; i< workerThreads.size(); i++) {
+        taskList.pushToQueue(nullptr);
+      }
+      for(auto& thread: workerThreads) {
+        thread.join();
+      }
+      workerThreads.clear();
+
+      // Wait for writerThread to finish.
+      if (writerThread.joinable()) {
+        clusterToWrite.pushToQueue(nullptr);
+        writerThread.join();
+      }
+    }
+
+    void CreatorData::addDirent(Dirent* dirent)
+    {
+      auto ret = dirents.insert(dirent);
+      if (!ret.second) {
+        Dirent* existing = *ret.first;
+        if (existing->isRedirect() && !dirent->isRedirect()) {
+          unresolvedRedirectDirents.erase(existing);
+          dirents.erase(ret.first);
+          existing->markRemoved();
+          dirents.insert(dirent);
+        } else {
+          std::ostringstream ss;
+          ss << "Impossible to add " << NsAsChar(dirent->getNamespace()) << "/" << dirent->getPath() << std::endl;
+          ss << "  dirent's title to add is : " << dirent->getTitle() << std::endl;
+          ss << "  existing dirent's title is : " << existing->getTitle() << std::endl;
+          throw std::runtime_error(ss.str());
+        }
+      };
+
+      if (dirent->isRedirect()) {
+        unresolvedRedirectDirents.insert(dirent);
+        nbRedirectItems++;
+      }
+    }
+
+    void CreatorData::addItemData(Dirent* dirent, std::unique_ptr<ContentProvider> provider, bool compressContent)
+    {
+      // Add blob data to compressed or uncompressed cluster.
+      auto itemSize = provider->getSize();
+      if (itemSize > 0)
+      {
+        isEmpty = false;
+      }
+
+      auto cluster = compressContent ? compCluster : uncompCluster;
+
+      // If cluster will be too large, write it to dis, and open a new
+      // one for the content.
+      if ( cluster->count()
+        && cluster->size().v+itemSize >= clusterSize
+         )
+      {
+        log_info("cluster with " << cluster->count() << " items, " <<
+                 cluster->size() << " bytes; current title \"" <<
+                 dirent->getTitle() << '\"');
+        cluster = closeCluster(compressContent);
+      }
+
+      dirent->setCluster(cluster);
+      cluster->addContent(std::move(provider));
+
+      if (compressContent) {
+        nbCompItems++;
+      } else {
+        nbUnCompItems++;
+      }
+    }
+
+    Dirent* CreatorData::createDirent(NS ns, const std::string& path, const std::string& mimetype, const std::string& title)
+    {
+      auto dirent = pool.getClassicDirent(ns, path, title, getMimeTypeIdx(mimetype));
+      addDirent(dirent);
+      return dirent;
+    }
+
+    Dirent* CreatorData::createItemDirent(const Item* item)
+    {
+      auto path = item->getPath();
+      auto mimetype = item->getMimeType();
+      if (mimetype.empty()) {
+        std::cerr << "Warning, " << item->getPath() << " have empty mimetype." << std::endl;
+        mimetype = "application/octet-stream";
+      }
+      return createDirent(NS::C, item->getPath(), mimetype, item->getTitle());
+    }
+
+    Dirent* CreatorData::createRedirectDirent(NS ns, const std::string& path, const std::string& title, NS targetNs, const std::string& targetPath)
+    {
+      auto dirent = pool.getRedirectDirent(ns, path, title, targetNs, targetPath);
+      addDirent(dirent);
+      return dirent;
+    }
+
+    Cluster* CreatorData::closeCluster(bool compressed)
+    {
+      Cluster *cluster;
+      nbClusters++;
+      if (compressed )
+      {
+        cluster = compCluster;
+        nbCompClusters++;
+      } else {
+        cluster = uncompCluster;
+        nbUnCompClusters++;
+      }
+      cluster->setClusterIndex(cluster_index_t(clustersList.size()));
+      clustersList.push_back(cluster);
+      taskList.pushToQueue(new ClusterTask(cluster));
+      clusterToWrite.pushToQueue(cluster);
+
+      if (compressed)
+      {
+        cluster = compCluster = new Cluster(compression);
+      } else {
+        cluster = uncompCluster = new Cluster(Compression::None);
+      }
+      return cluster;
+    }
+
+    void CreatorData::setEntryIndexes()
+    {
+      // set index
+      INFO("set index");
+      entry_index_t idx(0);
+      for (auto& dirent: dirents) {
+        dirent->setIdx(idx);
+        idx += 1;
+      }
+    }
+
+    void CreatorData::resolveRedirectIndexes()
+    {
+      // translate redirect aid to index
+      INFO("Resolve redirect");
+      for (auto dirent: unresolvedRedirectDirents)
+      {
+        Dirent tmpDirent(dirent->getRedirectNs(), dirent->getRedirectPath());
+        auto target_pos = dirents.find(&tmpDirent);
+        if(target_pos == dirents.end()) {
+          INFO("Invalid redirection "
+              << NsAsChar(dirent->getNamespace()) << '/' << dirent->getPath()
+              << " redirecting to (missing) "
+              << NsAsChar(dirent->getRedirectNs()) << '/' << dirent->getRedirectPath());
+          dirents.erase(dirent);
+          dirent->markRemoved();
+          if (dirent == mainPageDirent) {
+            mainPageDirent = nullptr;
+          }
+        } else  {
+          dirent->setRedirect(*target_pos);
+        }
+      }
+    }
+
+    void CreatorData::resolveMimeTypes()
+    {
+      std::vector<std::string> oldMImeList;
+      std::vector<uint16_t> mapping;
+
+      for (auto& rmimeType: rmimeTypesMap)
+      {
+        oldMImeList.push_back(rmimeType.second);
+        mimeTypesList.push_back(rmimeType.second);
+      }
+
+      mapping.resize(oldMImeList.size());
+      std::sort(mimeTypesList.begin(), mimeTypesList.end());
+
+      for (unsigned i=0; i<oldMImeList.size(); ++i)
+      {
+        for (unsigned j=0; j<mimeTypesList.size(); ++j)
+        {
+          if (oldMImeList[i] == mimeTypesList[j])
+            mapping[i] = static_cast<uint16_t>(j);
+        }
+      }
+
+      for (auto& dirent: dirents)
+      {
+        if (dirent->isItem())
+          dirent->setMimeType(mapping[dirent->getMimeType()]);
+      }
+    }
+
+    uint16_t CreatorData::getMimeTypeIdx(const std::string& mimeType)
+    {
+      auto it = mimeTypesMap.find(mimeType);
+      if (it == mimeTypesMap.end())
+      {
+        if (nextMimeIdx >= std::numeric_limits<uint16_t>::max())
+          throw std::runtime_error("too many distinct mime types");
+        mimeTypesMap[mimeType] = nextMimeIdx;
+        rmimeTypesMap[nextMimeIdx] = mimeType;
+        return nextMimeIdx++;
+      }
+
+      return it->second;
+    }
+
+    const std::string& CreatorData::getMimeType(uint16_t mimeTypeIdx) const
+    {
+      auto it = rmimeTypesMap.find(mimeTypeIdx);
+      if (it == rmimeTypesMap.end())
+        throw std::runtime_error("mime type index not found");
+      return it->second;
+    }
+  }
+}
diff --git a/src/writer/creatordata.h b/src/writer/creatordata.h
new file mode 100644
index 0000000..a4da9ae
--- /dev/null
+++ b/src/writer/creatordata.h
@@ -0,0 +1,153 @@
+/*
+ * Copyright (C) 2018-2021 Matthieu Gautier <mgautier@kymeria.fr>
+ * Copyright (C) 2021 Manessh P M <manu.pm55@gmail.com>
+ * Copyright (C) 2020 Veloman Yunkan
+ * Copyright (C) 2009 Tommi Maekitalo
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#ifndef ZIM_WRITER_CREATOR_DATA_H
+#define ZIM_WRITER_CREATOR_DATA_H
+
+#include <zim/writer/item.h>
+#include "queue.h"
+#include "_dirent.h"
+#include "workers.h"
+#include "handler.h"
+#include <set>
+#include <vector>
+#include <map>
+#include <fstream>
+#include <thread>
+#include "config.h"
+
+#include "../fileheader.h"
+#include "direntPool.h"
+#include "titleListingHandler.h"
+
+namespace zim
+{
+  namespace writer
+  {
+    struct UrlCompare {
+      bool operator() (const Dirent* d1, const Dirent* d2) const {
+        return compareUrl(d1, d2);
+      }
+    };
+
+    class Cluster;
+    class CreatorData
+    {
+      public:
+        typedef std::set<Dirent*, UrlCompare> UrlSortedDirents;
+        typedef std::map<std::string, uint16_t> MimeTypesMap;
+        typedef std::map<uint16_t, std::string> RMimeTypesMap;
+        typedef std::vector<std::string> MimeTypesList;
+        typedef std::vector<Cluster*> ClusterList;
+        typedef Queue<Cluster*> ClusterQueue;
+        typedef Queue<Task*> TaskQueue;
+        typedef std::vector<std::thread> ThreadList;
+
+        CreatorData(const std::string& fname, bool verbose,
+                       bool withIndex, std::string language,
+                       Compression compression,
+                       size_t clusterSize);
+        virtual ~CreatorData();
+
+        void addDirent(Dirent* dirent);
+        void addItemData(Dirent* dirent, std::unique_ptr<ContentProvider> provider, bool compressContent);
+
+        Dirent* createDirent(NS ns, const std::string& path, const std::string& mimetype, const std::string& title);
+        Dirent* createItemDirent(const Item* item);
+        Dirent* createRedirectDirent(NS ns, const std::string& path, const std::string& title, NS targetNs, const std::string& targetPath);
+        Cluster* closeCluster(bool compressed);
+
+        void setEntryIndexes();
+        void resolveRedirectIndexes();
+        void resolveMimeTypes();
+
+        uint16_t getMimeTypeIdx(const std::string& mimeType);
+        const std::string& getMimeType(uint16_t mimeTypeIdx) const;
+
+        void quitAllThreads();
+
+        DirentPool  pool;
+
+        UrlSortedDirents   dirents;
+        UrlSortedDirents   unresolvedRedirectDirents;
+        Dirent*            mainPageDirent;
+
+        MimeTypesMap mimeTypesMap;
+        RMimeTypesMap rmimeTypesMap;
+        MimeTypesList mimeTypesList;
+        uint16_t nextMimeIdx = 0;
+
+        ClusterList clustersList;
+        ClusterQueue clusterToWrite;
+        TaskQueue taskList;
+        ThreadList workerThreads;
+        std::thread  writerThread;
+        const Compression compression;
+        std::string zimName;
+        std::string tmpFileName;
+        bool isEmpty = true;
+        size_t clusterSize;
+        Cluster *compCluster = nullptr;
+        Cluster *uncompCluster = nullptr;
+        int out_fd;
+
+        bool withIndex;
+        std::string indexingLanguage;
+
+        std::shared_ptr<TitleListingHandler> mp_titleListingHandler;
+        offset_t m_titleListBlobOffset;  // The offset the title list blob,
+                                         // related to the beginning of the start of cluster's data.
+        std::vector<std::shared_ptr<DirentHandler>> m_direntHandlers;
+        void handle(Dirent* dirent, const Hints& hints = Hints()) {
+          for(auto& handler: m_direntHandlers) {
+            handler->handle(dirent, hints);
+          }
+        }
+        void handle(Dirent* dirent, std::shared_ptr<Item> item) {
+          for(auto& handler: m_direntHandlers) {
+            handler->handle(dirent, item);
+          }
+        }
+
+        // Some stats
+        bool verbose;
+        entry_index_type nbItems;
+        entry_index_type nbRedirectItems;
+        entry_index_type nbCompItems;
+        entry_index_type nbUnCompItems;
+        cluster_index_type nbClusters;
+        cluster_index_type nbCompClusters;
+        cluster_index_type nbUnCompClusters;
+        time_t start_time;
+
+        cluster_index_t clusterCount() const
+        { return cluster_index_t(clustersList.size()); }
+
+        entry_index_t itemCount() const
+        { return entry_index_t(dirents.size()); }
+    };
+
+  }
+
+}
+
+#endif // ZIM_WRITER_CREATOR_DATA_H
diff --git a/src/writer/defaultIndexData.h b/src/writer/defaultIndexData.h
new file mode 100644
index 0000000..9924c32
--- /dev/null
+++ b/src/writer/defaultIndexData.h
@@ -0,0 +1,130 @@
+/*
+ * Copyright (C) 2021 Matthieu Gautier <mgautier@kymeria.fr>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#ifndef ZIM_WRITER_DEFAULTINDEXDATA_H
+#define ZIM_WRITER_DEFAULTINDEXDATA_H
+
+#include <zim/writer/item.h>
+#include "xapian/myhtmlparse.h"
+#include "../tools.h"
+
+#include <atomic>
+#include <mutex>
+#include <sstream>
+
+namespace zim
+{
+  namespace writer
+  {
+    class DefaultIndexData : public IndexData {
+      public:
+        DefaultIndexData(std::unique_ptr<ContentProvider> contentProvider, const std::string& title)
+          : m_initialized(false),
+            mp_contentProvider(std::move(contentProvider)),
+#if defined(ENABLE_XAPIAN)
+            m_title(zim::removeAccents(title)),
+#else
+            m_title(""),
+#endif
+            m_hasIndexData(false),
+            m_content(""),
+            m_keywords(""),
+            m_wordCount(0),
+            m_geoPosition(std::make_tuple(false, 0, 0))
+        {}
+
+        void initialize() const {
+          if (m_initialized) {
+            return;
+          }
+          std::lock_guard<std::mutex> lock(m_initLock);
+          // We have to do a double check to be sure that two call on a un-initialized object
+          // will not be initiialized twice.
+          if (m_initialized) {
+            return;
+          }
+#if defined(ENABLE_XAPIAN)
+          std::ostringstream ss;
+          while (true) {
+            auto blob = mp_contentProvider->feed();
+            if(blob.size() == 0) {
+              break;
+            }
+            ss << blob;
+          }
+          MyHtmlParser htmlParser;
+          try {
+            htmlParser.parse_html(ss.str(), "UTF-8", true);
+          } catch(...) {}
+          m_hasIndexData = !htmlParser.dump.empty() && htmlParser.indexing_allowed && (htmlParser.dump.find("NOINDEX") == std::string::npos);
+          m_content = zim::removeAccents(htmlParser.dump);
+          m_keywords = zim::removeAccents(htmlParser.keywords);
+          m_wordCount = zim::countWords(htmlParser.dump);
+          if(htmlParser.has_geoPosition) {
+            m_geoPosition = std::make_tuple(true, htmlParser.latitude, htmlParser.longitude);
+          }
+#endif
+          m_initialized = true;
+        }
+
+        bool hasIndexData() const {
+          initialize();
+          return m_hasIndexData;
+        }
+
+        std::string getTitle() const {
+          return m_title;
+         }
+
+        std::string getContent() const {
+          initialize();
+          return m_content;
+        }
+
+        std::string getKeywords() const {
+          initialize();
+          return m_keywords;
+        }
+
+        uint32_t getWordCount() const {
+          initialize();
+          return m_wordCount;
+        }
+
+        GeoPosition getGeoPosition() const
+        {
+          initialize();
+          return m_geoPosition;
+        }
+
+      private:
+        mutable std::atomic<bool> m_initialized;
+        mutable std::mutex m_initLock;
+        std::unique_ptr<ContentProvider> mp_contentProvider;
+        std::string m_title;
+        mutable bool m_hasIndexData;
+        mutable std::string m_content;
+        mutable std::string m_keywords;
+        mutable uint32_t m_wordCount;
+        mutable GeoPosition m_geoPosition;
+    };
+  }
+}
+
+#endif // ZIM_WRITER_DEFAULTINDEXDATA_H
diff --git a/src/writer/dirent.cpp b/src/writer/dirent.cpp
new file mode 100644
index 0000000..f5df2ec
--- /dev/null
+++ b/src/writer/dirent.cpp
@@ -0,0 +1,116 @@
+/*
+ * Copyright (C) 2020 Matthieu Gautier <mgautier@kymeria.fr>
+ * Copyright (C) 2006 Tommi Maekitalo
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#include "_dirent.h"
+#include <zim/zim.h>
+#include "buffer.h"
+#include "endian_tools.h"
+#include "log.h"
+#include <algorithm>
+#include <cstring>
+#ifdef _WIN32
+# include <io.h>
+#else
+# include <unistd.h>
+# define _write(fd, addr, size) if(::write((fd), (addr), (size)) != (ssize_t)(size)) \
+{throw std::runtime_error("Error writing");}
+#endif
+
+log_define("zim.dirent")
+
+namespace zim {
+namespace writer {
+
+char NsAsChar(NS ns) {
+  switch(ns) {
+    case NS::C: return 'C';
+    case NS::M: return 'M';
+    case NS::W: return 'W';
+    case NS::X: return 'X';
+  }
+  throw std::runtime_error("Invalid namespace value.");
+}
+
+// Creator for a "classic" dirent
+Dirent::Dirent(NS ns, const std::string& path, const std::string& title, uint16_t mimetype)
+  : pathTitle(path, title),
+    mimeType(mimetype),
+    idx(0),
+    info(DirentInfo::Direct()),
+    offset(0),
+    _ns(static_cast<uint8_t>(ns)),
+    removed(false),
+    frontArticle(false)
+{}
+
+// Creator for a "redirection" dirent
+Dirent::Dirent(NS ns, const std::string& path, const std::string& title, NS targetNs, const std::string& targetPath)
+  : pathTitle(path, title),
+    mimeType(redirectMimeType),
+    idx(0),
+    info(std::move(DirentInfo::Redirect(targetNs, targetPath))),
+    offset(0),
+    _ns(static_cast<uint8_t>(ns)),
+    removed(false),
+    frontArticle(false)
+{}
+
+NS Dirent::getRedirectNs() const {
+  return info.getRedirect().ns;
+}
+
+std::string Dirent::getRedirectPath() const {
+  return info.getRedirect().targetPath;
+}
+
+void Dirent::write(int out_fd) const
+{
+  const static char zero = 0;
+  union
+  {
+    char d[16];
+    long a;
+  } header;
+  zim::toLittleEndian(getMimeType(), header.d);
+  header.d[2] = 0; // parameter size
+  header.d[3] = NsAsChar(getNamespace());
+
+  log_debug("title=" << dirent.getTitle() << " title.size()=" << dirent.getTitle().size());
+
+  zim::toLittleEndian(getVersion(), header.d + 4);
+
+  if (isRedirect())
+  {
+    zim::toLittleEndian(getRedirectIndex().v, header.d + 8);
+    _write(out_fd, header.d, 12);
+  }
+  else
+  {
+    zim::toLittleEndian(zim::cluster_index_type(getClusterNumber()), header.d + 8);
+    zim::toLittleEndian(zim::blob_index_type(getBlobNumber()), header.d + 12);
+    _write(out_fd, header.d, 16);
+  }
+
+  _write(out_fd, pathTitle.data(), pathTitle.size());
+  _write(out_fd, &zero, 1);
+}
+
+}
+}
diff --git a/src/writer/direntPool.h b/src/writer/direntPool.h
new file mode 100644
index 0000000..227fbb3
--- /dev/null
+++ b/src/writer/direntPool.h
@@ -0,0 +1,89 @@
+/*
+ * Copyright (C) 2019-2021 Matthieu Gautier
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#ifndef ZIM_WRITER_DIRENTPOOL_H
+#define ZIM_WRITER_DIRENTPOOL_H
+
+#include "debug.h"
+#include "_dirent.h"
+
+namespace zim
+{
+  namespace writer {
+    class DirentPool {
+      private:
+        std::vector<Dirent*> pools;
+        uint16_t direntIndex;
+
+        void allocate_new_pool() {
+          pools.push_back(reinterpret_cast<Dirent*>(new char[sizeof(Dirent)*0xFFFF]));
+          direntIndex = 0;
+        }
+        static void destroyPoolBlock(Dirent* pool, uint16_t count=0xFFFF) {
+          for (auto i = 0U; i < count; i++) {
+            try {
+              pool[i].~Dirent();
+            } catch (...){ /*discard*/ }
+          }
+          delete [] (reinterpret_cast<char*>(pool));
+        }
+
+
+      public:
+        DirentPool() :
+          direntIndex(0xFFFF)
+        {}
+        DirentPool(const DirentPool&) = delete;
+        DirentPool& operator=(const DirentPool&) = delete;
+        ~DirentPool() {
+          auto nbPools = pools.size();
+          if (nbPools == 0) {
+            return;
+          }
+          // Delete all but last pools (add call the destructors of the dirents)
+          for (auto i = 0U; i<nbPools-1; i++) {
+            destroyPoolBlock(pools[i]);
+          }
+          // On the last pool, only `direntIndex` are really constructed.
+          destroyPoolBlock(pools[nbPools-1], direntIndex);
+        }
+
+        Dirent* getClassicDirent(NS ns, const std::string& path, const std::string& title, uint16_t mimetype) {
+          if (direntIndex == 0xFFFF) {
+            allocate_new_pool();
+          }
+          auto dirent = pools.back() + direntIndex++;
+          new (dirent) Dirent(ns, path, title, mimetype);
+          return dirent;
+        }
+
+        Dirent* getRedirectDirent(NS ns, const std::string& path, const std::string& title, NS targetNs, const std::string& targetPath) {
+          if (direntIndex == 0xFFFF) {
+            allocate_new_pool();
+          }
+          auto dirent = pools.back() + direntIndex++;
+          new (dirent) Dirent(ns, path, title, targetNs, targetPath);
+          return dirent;
+        }
+    };
+  }
+}
+
+#endif // ZIM_WRITER_DIRENTPOLL_H
+
diff --git a/src/writer/handler.h b/src/writer/handler.h
new file mode 100644
index 0000000..b38b130
--- /dev/null
+++ b/src/writer/handler.h
@@ -0,0 +1,105 @@
+/*
+ * Copyright (C) 2020-2021 Matthieu Gautier <mgautier@kymeria.fr>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU  General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#ifndef OPENZIM_LIBZIM_WRITER_HANDLER_H
+#define OPENZIM_LIBZIM_WRITER_HANDLER_H
+
+#include <string>
+#include <memory>
+#include <vector>
+
+#include <zim/writer/item.h>
+
+namespace zim {
+namespace writer {
+
+class CreatorData;
+class ContentProvider;
+class Dirent;
+
+/**
+ * DirentHandler is used to add "extra" handling on dirent/item.
+ *
+ * The main purpose of the handle is to "see" all dirents corresponding to user entries
+ * and generate it's own dirent/item.
+ *
+ * Classical use cases are :
+ *  - Generating a index of the item (xapianIndex)
+ *  - Generating a listing of the item (all item or "main" entries only)
+ *  - Count mimetypes
+ *  - ...
+ *
+ * The workflow is the following:
+ * - Start the handler with `start()`.
+ * - Pass dirents to handle using `handle()`.
+ *   If a handler has to handle itself, it has to do it itself before (in start/stop, ...)
+ *   The handlers will NOT have dirents of other handlers passed.
+ *   (Exception made for titleListingHandle)
+ * - Get the dirents associated to the handler using `createDirents()`.
+ *   Handler must created dirents if entry/entries associated to it must be created.
+ *   It may create several dirents if several entries must be created.
+ *   It may return a empty vector (no dirent) if no entry must be created (empty listing,...).
+ * - All dirents are correctly set (redirect resolved, index and mimetype set, ...)
+ * - Stop the handler with `stop()`.
+ * - Get the content of the handler is taken using `getContentProviders`.
+ *   Handle MUST returns the same number of contentProvider that the number of dirents it has returned.
+ *
+ *  While it seems that DirentHandler is dynamically (de)activated by user it is not.
+ *  This is purelly a internal structure to simplify the internal architecture of the writer.
+ */
+class DirentHandler {
+  public:
+    explicit DirentHandler(CreatorData* data);
+    virtual ~DirentHandler() = default;
+    using ContentProviders = std::vector<std::unique_ptr<ContentProvider>>;
+    using Dirents = std::vector<Dirent*>;
+
+    virtual void start() = 0;
+    virtual void stop() = 0;
+    virtual bool isCompressible() = 0;
+    const Dirents& getDirents() {
+      if (!m_direntsCreated) {
+        m_dirents = createDirents();
+        m_direntsCreated = true;
+      }
+      return m_dirents;
+    }
+    virtual ContentProviders getContentProviders() const = 0;
+
+    /*
+     * Handle a dirent/item.
+     *
+     * item may be nullptr (dirent is a redirect or in special case)
+     */
+    virtual void handle(Dirent* dirent, std::shared_ptr<Item> item) = 0;
+    virtual void handle(Dirent* dirent, const Hints& hints) = 0;
+
+  protected:
+    virtual Dirents createDirents() const = 0;
+    DirentHandler() = default;
+
+  private:
+    Dirents m_dirents;
+    bool m_direntsCreated {false};
+};
+
+}
+}
+
+#endif // OPENZIM_LIBZIM_WRITER_HANDLER_H
diff --git a/src/writer/item.cpp b/src/writer/item.cpp
new file mode 100644
index 0000000..3ba745f
--- /dev/null
+++ b/src/writer/item.cpp
@@ -0,0 +1,71 @@
+/*
+ * Copyright (C) 2020-2021 Matthieu Gautier <mgautier@kymeria.fr>
+ * Copyright (C) 2009 Tommi Maekitalo
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#include <zim/writer/item.h>
+#include <zim/writer/contentProvider.h>
+#include "defaultIndexData.h"
+
+namespace zim
+{
+  namespace writer
+  {
+    std::shared_ptr<IndexData> Item::getIndexData() const
+    {
+      if (getMimeType().find("text/html")!=0) {
+        return nullptr;
+      }
+
+      auto provider = getContentProvider();
+      return std::make_shared<DefaultIndexData>(std::move(provider), getTitle());
+    }
+
+    Hints Item::getHints() const {
+      return Hints();
+    }
+
+    Hints Item::getAmendedHints() const {
+      auto hints = getHints();
+
+      //Â If not FRONT_ARTICLE hints is given, determine it from the mimetype.
+      if (hints.find(FRONT_ARTICLE) == hints.end()) {
+        hints[FRONT_ARTICLE] = (getMimeType().find("text/html") == 0);
+      }
+
+      // If not COMPRESS hints is given, determine it from the mimetype.
+      if (hints.find(COMPRESS) == hints.end()) {
+        hints[COMPRESS] = isCompressibleMimetype(getMimeType());
+      }
+      return hints;
+    }
+
+    std::unique_ptr<ContentProvider> StringItem::getContentProvider() const
+    {
+      auto shared_string = std::shared_ptr<const std::string>(shared_from_this(), &content);
+      return std::unique_ptr<ContentProvider>(new SharedStringProvider(shared_string));
+    }
+
+    std::unique_ptr<ContentProvider> FileItem::getContentProvider() const
+    {
+      return std::unique_ptr<ContentProvider>(new FileProvider(filepath));
+    }
+
+
+  }
+}
diff --git a/src/writer/queue.h b/src/writer/queue.h
new file mode 100644
index 0000000..454087e
--- /dev/null
+++ b/src/writer/queue.h
@@ -0,0 +1,100 @@
+/*
+ * Copyright (C) 2016-2020 Matthieu Gautier <mgautier@kymeria.fr>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU  General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#ifndef OPENZIM_LIBZIM_QUEUE_H
+#define OPENZIM_LIBZIM_QUEUE_H
+
+#define MAX_QUEUE_SIZE 10
+
+#include <mutex>
+#include <queue>
+#include "../tools.h"
+
+template<typename T>
+class Queue {
+    public:
+        Queue() = default;
+        virtual ~Queue() = default;
+        virtual bool isEmpty();
+        virtual size_t size();
+        virtual void pushToQueue(const T& element);
+        virtual bool getHead(T &element);
+        virtual bool popFromQueue(T &element);
+
+    protected:
+        std::queue<T>   m_realQueue;
+        std::mutex      m_queueMutex;
+
+    private:
+        // Make this queue non copyable
+        Queue(const Queue&);
+        Queue& operator=(const Queue&);
+};
+
+template<typename T>
+bool Queue<T>::isEmpty() {
+    std::lock_guard<std::mutex> l(m_queueMutex);
+    return m_realQueue.empty();
+}
+
+template<typename T>
+size_t Queue<T>::size() {
+    std::lock_guard<std::mutex> l(m_queueMutex);
+    return m_realQueue.size();
+}
+
+template<typename T>
+void Queue<T>::pushToQueue(const T &element) {
+    unsigned int wait = 0;
+    unsigned int queueSize = 0;
+
+    do {
+        zim::microsleep(wait);
+        queueSize = size();
+        wait += 10;
+    } while (queueSize > MAX_QUEUE_SIZE);
+
+    std::lock_guard<std::mutex> l(m_queueMutex);
+    m_realQueue.push(element);
+}
+
+template<typename T>
+bool Queue<T>::getHead(T &element) {
+    std::lock_guard<std::mutex> l(m_queueMutex);
+    if (m_realQueue.empty()) {
+        return false;
+    }
+    element = m_realQueue.front();
+    return true;
+}
+
+template<typename T>
+bool Queue<T>::popFromQueue(T &element) {
+    std::lock_guard<std::mutex> l(m_queueMutex);
+    if (m_realQueue.empty()) {
+        return false;
+    }
+
+    element = m_realQueue.front();
+    m_realQueue.pop();
+
+  return true;
+}
+
+#endif // OPENZIM_LIBZIM_QUEUE_H
diff --git a/src/writer/tinyString.h b/src/writer/tinyString.h
new file mode 100644
index 0000000..bb8bde9
--- /dev/null
+++ b/src/writer/tinyString.h
@@ -0,0 +1,120 @@
+/*
+ * Copyright (C) 2021 Matthieu Gautier <mgautier@mgautier.fr>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#ifndef ZIM_WRITER_TINYSTRING_H
+#define ZIM_WRITER_TINYSTRING_H
+
+#include "../zim_types.h"
+#include <cstring>
+
+namespace zim
+{
+  namespace writer {
+    class TinyString {
+      public: // functions
+        TinyString() :
+          m_data(nullptr),
+          m_size(0)
+        {}
+        TinyString(const std::string& s) :
+          m_data(new char[(uint16_t)s.size()]),
+          m_size(s.size())
+        {
+          if (s.size() >= 0xFFFF) {
+            throw std::runtime_error("String len is too big");
+          }
+          std::memcpy(m_data, s.data(), m_size);
+        }
+        TinyString(TinyString&& t):
+          m_data(t.m_data),
+          m_size(t.m_size)
+        {
+          t.m_data = nullptr;
+          t.m_size = 0;
+        };
+        TinyString(const TinyString& t) = delete;
+        ~TinyString() {
+          if (m_data) {
+            delete[] m_data;
+            m_data = nullptr;
+          }
+        }
+        operator std::string() const { return std::string(m_data, m_size); }
+        bool empty() const { return m_size == 0; }
+        size_t size() const { return m_size; }
+        const char* const data() const { return m_data; }
+        bool operator==(const TinyString& other) const {
+          return (m_size == other.m_size) && (std::memcmp(m_data, other.m_data, m_size) == 0);
+        }
+        bool operator<(const TinyString& other) const {
+          auto min_size = std::min(m_size, other.m_size);
+          auto ret = std::memcmp(m_data, other.m_data, min_size);
+          if (ret == 0) {
+            return m_size < other.m_size;
+          } else {
+            return ret < 0;
+          }
+        }
+
+      protected: // members
+        char* m_data;
+        uint16_t m_size;
+    } PACKED;
+
+    class PathTitleTinyString : public TinyString {
+      public:
+        PathTitleTinyString() : TinyString() {}
+        PathTitleTinyString(const std::string& path, const std::string& title)
+          : TinyString(PathTitleTinyString::concat(path, title))
+        {}
+
+        static std::string concat(const std::string& path, const std::string& title) {
+          std::string result(path.data(), path.size()+1);
+          if ( title != path ) {
+            result += title;
+          }
+          return result;
+        }
+        std::string getPath() const {
+          if (m_size == 0) {
+            return std::string();
+          }
+          return std::string(m_data);
+        }
+        std::string getTitle(bool storedOnly) const {
+          if (m_size == 0) {
+            return std::string();
+          }
+          auto title_start = std::strlen(m_data) + 1;
+          if (title_start == m_size) {
+            if (storedOnly) {
+              return std::string(); // return empty title
+            } else {
+              return std::string(m_data); // return the path as a title
+            }
+          } else {
+            return std::string(m_data+title_start, m_size-title_start);
+          }
+        }
+    } PACKED;
+  }
+}
+
+#endif // ZIM_WRITER_TINYSTRING_H
+
diff --git a/src/writer/titleListingHandler.cpp b/src/writer/titleListingHandler.cpp
new file mode 100644
index 0000000..b6507db
--- /dev/null
+++ b/src/writer/titleListingHandler.cpp
@@ -0,0 +1,128 @@
+/*
+ * Copyright (C) 2020-2021 Matthieu Gautier <mgautier@kymeria.fr>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU  General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#include "titleListingHandler.h"
+#include "creatordata.h"
+
+#include "../endian_tools.h"
+
+#include <zim/writer/contentProvider.h>
+#include <zim/blob.h>
+
+using namespace zim::writer;
+
+namespace {
+
+class ListingProvider : public ContentProvider {
+  public:
+    ListingProvider(const TitleListingHandler::Dirents* dirents, bool frontOnly)
+      : mp_dirents(dirents),
+        m_it(dirents->begin()),
+        m_frontOnly(frontOnly)
+    {}
+
+    zim::size_type getSize() const override {
+      if (m_frontOnly) {
+        auto nbFrontArticles = std::count_if(mp_dirents->begin(), mp_dirents->end(), [](Dirent* d) { return d->isFrontArticle();});
+        return nbFrontArticles * sizeof(zim::entry_index_type);
+      } else {
+        return mp_dirents->size() * sizeof(zim::entry_index_type);
+      }
+    }
+
+    zim::Blob feed() override {
+      if (m_frontOnly) {
+        while (m_it != mp_dirents->end() && !(*m_it)->isFrontArticle()) {
+          m_it++;
+        }
+      }
+      if (m_it == mp_dirents->end()) {
+        return zim::Blob(nullptr, 0);
+      }
+      zim::toLittleEndian((*m_it)->getIdx().v, buffer);
+      m_it++;
+      return zim::Blob(buffer, sizeof(zim::entry_index_type));
+    }
+
+  private:
+    const TitleListingHandler::Dirents* mp_dirents;
+    char buffer[sizeof(zim::entry_index_type)];
+    TitleListingHandler::Dirents::const_iterator m_it;
+    bool m_frontOnly;
+};
+
+} // end of anonymous namespace
+
+TitleListingHandler::TitleListingHandler(CreatorData* data)
+  : mp_creatorData(data),
+    m_hasFrontArticles(false)
+{}
+
+TitleListingHandler::~TitleListingHandler() = default;
+
+void TitleListingHandler::start() {
+}
+
+void TitleListingHandler::stop() {
+  m_handledDirents.erase(
+    std::remove_if(m_handledDirents.begin(), m_handledDirents.end(), [](const Dirent* d) { return d->isRemoved(); }),
+    m_handledDirents.end());
+  std::sort(m_handledDirents.begin(), m_handledDirents.end(), TitleCompare());
+}
+
+DirentHandler::Dirents TitleListingHandler::createDirents() const {
+  Dirents ret;
+  ret.push_back(mp_creatorData->createDirent(NS::X, "listing/titleOrdered/v0", "application/octet-stream+zimlisting", ""));
+  if (m_hasFrontArticles) {
+    ret.push_back(mp_creatorData->createDirent(NS::X, "listing/titleOrdered/v1", "application/octet-stream+zimlisting", ""));
+  }
+  return ret;
+}
+
+DirentHandler::ContentProviders TitleListingHandler::getContentProviders() const {
+  ContentProviders ret;
+  ret.push_back(std::unique_ptr<ContentProvider>(new ListingProvider(&m_handledDirents, false)));
+  if (m_hasFrontArticles) {
+    ret.push_back(std::unique_ptr<ContentProvider>(new ListingProvider(&m_handledDirents, true)));
+  }
+  return ret;
+}
+
+void TitleListingHandler::handle(Dirent* dirent, std::shared_ptr<Item> item)
+{
+  handle(dirent, item->getAmendedHints());
+}
+
+void TitleListingHandler::handle(Dirent* dirent, const Hints& hints)
+{
+  m_handledDirents.push_back(dirent);
+
+  // By definition, dirent not in `C` namespace are not FRONT_ARTICLE
+  if (dirent->getNamespace() != NS::C) {
+    return;
+  }
+
+  try {
+    if(bool(hints.at(FRONT_ARTICLE))) {
+      m_hasFrontArticles = true;
+      dirent->setFrontArticle();
+    }
+  } catch(std::out_of_range&) {}
+}
+
diff --git a/src/writer/titleListingHandler.h b/src/writer/titleListingHandler.h
new file mode 100644
index 0000000..65f19d5
--- /dev/null
+++ b/src/writer/titleListingHandler.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (C) 2020-2021 Matthieu Gautier <mgautier@kymeria.fr>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU  General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#ifndef OPENZIM_LIBZIM_LISTING_HANDLER_H
+#define OPENZIM_LIBZIM_LISTING_HANDLER_H
+
+#include "handler.h"
+#include "_dirent.h"
+
+#include <vector>
+
+namespace zim {
+namespace writer {
+
+struct TitleCompare {
+  bool operator() (const Dirent* d1, const Dirent* d2) const {
+    return compareTitle(d1, d2);
+  }
+};
+
+// This handler is in charge of handling titles.
+// It will create the "classic" old V0 title listing (for ALL entries) but also
+// the V1 title listing (for front article only).
+class TitleListingHandler : public DirentHandler {
+  public:
+    explicit TitleListingHandler(CreatorData* data);
+    virtual ~TitleListingHandler();
+
+    void start() override;
+    void stop() override;
+    bool isCompressible() override { return false; }
+    ContentProviders getContentProviders() const override;
+    void handle(Dirent* dirent, std::shared_ptr<Item> item) override;
+    void handle(Dirent* dirent, const Hints& hints) override;
+
+  protected:
+    Dirents createDirents() const override;
+    CreatorData* mp_creatorData;
+    Dirents m_handledDirents;
+    bool m_hasFrontArticles;
+};
+}
+}
+
+#endif // OPENZIM_LIBZIM_LISTING_HANDLER_H
diff --git a/src/writer/workers.cpp b/src/writer/workers.cpp
new file mode 100644
index 0000000..d48418f
--- /dev/null
+++ b/src/writer/workers.cpp
@@ -0,0 +1,82 @@
+/*
+ * Copyright (C) 2019-2020 Matthieu Gautier <mgautier@kymeria.fr>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#include "workers.h"
+#include "cluster.h"
+#include "creatordata.h"
+
+#include "../tools.h"
+
+#ifdef _WIN32
+#include <io.h>
+#else
+#include <unistd.h>
+#endif
+
+namespace zim
+{
+  namespace writer
+  {
+
+    void* taskRunner(void* arg) {
+      auto creatorData = static_cast<zim::writer::CreatorData*>(arg);
+      Task* task;
+      unsigned int wait = 0;
+
+      while(true) {
+        microsleep(wait);
+        wait += 100;
+        if (creatorData->taskList.popFromQueue(task)) {
+          if (task == nullptr) {
+            return nullptr;
+          }
+          task->run(creatorData);
+          delete task;
+          wait = 0;
+        }
+      }
+      return nullptr;
+    }
+
+    void* clusterWriter(void* arg) {
+      auto creatorData = static_cast<zim::writer::CreatorData*>(arg);
+      Cluster* cluster;
+      unsigned int wait = 0;
+      while(true) {
+        microsleep(wait);
+        wait += 100;
+        if(creatorData->clusterToWrite.getHead(cluster)) {
+          if (cluster == nullptr) {
+            // All cluster writen, we can quit
+            return nullptr;
+          }
+          if (not cluster->isClosed()) {
+            continue;
+          }
+          creatorData->clusterToWrite.popFromQueue(cluster);
+          cluster->setOffset(offset_t(lseek(creatorData->out_fd, 0, SEEK_CUR)));
+          cluster->write(creatorData->out_fd);
+          cluster->clear_data();
+          wait = 0;
+        }
+      }
+      return nullptr;
+    }
+  }
+}
diff --git a/src/writer/workers.h b/src/writer/workers.h
new file mode 100644
index 0000000..2e9d68c
--- /dev/null
+++ b/src/writer/workers.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2019-2020 Matthieu Gautier <mgautier@kymeria.fr>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU  General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#ifndef OPENZIM_LIBZIM_WORKERS_H
+#define OPENZIM_LIBZIM_WORKERS_H
+
+namespace zim {
+namespace writer {
+
+class CreatorData;
+
+class Task {
+  public:
+    Task() = default;
+    virtual ~Task() = default;
+
+    virtual void run(CreatorData* data) = 0;
+};
+
+void* taskRunner(void* data);
+void* clusterWriter(void* data);
+
+}
+}
+
+#endif // OPENZIM_LIBZIM_WORKERS_H
diff --git a/src/writer/xapianHandler.cpp b/src/writer/xapianHandler.cpp
new file mode 100644
index 0000000..407ccbb
--- /dev/null
+++ b/src/writer/xapianHandler.cpp
@@ -0,0 +1,127 @@
+/*
+ * Copyright (C) 2021 Maneesh P M <manu.pm55@gmail.com>
+ * Copyright (C) 2020-2021 Matthieu Gautier <mgautier@kymeria.fr>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU  General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#include "xapianHandler.h"
+#include "xapianIndexer.h"
+#include "xapianWorker.h"
+#include "creatordata.h"
+
+#include <zim/writer/contentProvider.h>
+
+using namespace zim::writer;
+
+XapianHandler::XapianHandler(CreatorData* data, bool withFulltextIndex)
+  : mp_fulltextIndexer(withFulltextIndex ? new XapianIndexer(data->zimName+"_fulltext.idx", data->indexingLanguage, IndexingMode::FULL, true) : nullptr),
+    mp_titleIndexer(new XapianIndexer(data->zimName+"_title.idx", data->indexingLanguage, IndexingMode::TITLE, true)),
+    mp_creatorData(data)
+{}
+
+XapianHandler::~XapianHandler() = default;
+
+void XapianHandler::start() {
+  if (mp_fulltextIndexer) {
+    mp_fulltextIndexer->indexingPrelude();
+  }
+  mp_titleIndexer->indexingPrelude();
+}
+
+void XapianHandler::stop() {
+  // We need to wait that all indexation tasks have been done before closing the
+  // xapian database.
+  if (mp_fulltextIndexer) {
+    IndexTask::waitNoMoreTask();
+    mp_fulltextIndexer->indexingPostlude();
+  }
+  mp_titleIndexer->indexingPostlude();
+}
+
+DirentHandler::Dirents XapianHandler::createDirents() const {
+  // Wait for all task to be done before checking if we are empty.
+  Dirents ret;
+  if (mp_fulltextIndexer) {
+    IndexTask::waitNoMoreTask();
+    if (!mp_fulltextIndexer->is_empty()) {
+      ret.push_back(mp_creatorData->createDirent(NS::X, "fulltext/xapian", "application/octet-stream+xapian", ""));
+    }
+  }
+  if (!mp_titleIndexer->is_empty()) {
+    ret.push_back(mp_creatorData->createDirent(NS::X, "title/xapian", "application/octet-stream+xapian", ""));
+  }
+  return ret;
+}
+
+DirentHandler::ContentProviders XapianHandler::getContentProviders() const {
+  ContentProviders ret;
+  if (mp_fulltextIndexer && !mp_fulltextIndexer->is_empty()) {
+    ret.push_back(std::unique_ptr<ContentProvider>(new FileProvider(mp_fulltextIndexer->getIndexPath())));
+  }
+  if (!mp_titleIndexer->is_empty()) {
+    ret.push_back(std::unique_ptr<ContentProvider>(new FileProvider(mp_titleIndexer->getIndexPath())));
+  }
+  return ret;
+}
+
+void XapianHandler::indexTitle(Dirent* dirent) {
+  auto title = dirent->getRealTitle();
+  if (title.empty()) {
+    return;
+  }
+  auto path = dirent->getPath();
+  if (dirent->isRedirect()) {
+    auto redirectPath = dirent->getRedirectPath();
+    mp_titleIndexer->indexTitle(path, title, redirectPath);
+  } else {
+    mp_titleIndexer->indexTitle(path, title);
+  }
+}
+
+void XapianHandler::handle(Dirent* dirent, const Hints& hints)
+{
+  if (dirent->getNamespace() != NS::C) {
+    return;
+  }
+
+  try {
+    if (bool(hints.at(FRONT_ARTICLE))) {
+      indexTitle(dirent);
+    }
+  } catch(std::out_of_range&) {}
+}
+
+void XapianHandler::handle(Dirent* dirent, std::shared_ptr<Item> item)
+{
+  if (dirent->getNamespace() != NS::C) {
+    return;
+  }
+
+  // Title index.
+  handle(dirent, item->getAmendedHints());
+
+  // FullText index
+  if (mp_fulltextIndexer) {
+    auto indexData = item->getIndexData();
+    if (!indexData) {
+      return;
+    }
+    auto path = dirent->getPath();
+    mp_creatorData->taskList.pushToQueue(new IndexTask(indexData, path, mp_fulltextIndexer.get()));
+  }
+}
+
diff --git a/src/writer/xapianHandler.h b/src/writer/xapianHandler.h
new file mode 100644
index 0000000..86a6773
--- /dev/null
+++ b/src/writer/xapianHandler.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2020-2021 Matthieu Gautier <mgautier@kymeria.fr>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU  General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#ifndef OPENZIM_LIBZIM_XAPIAN_HANDLER_H
+#define OPENZIM_LIBZIM_XAPIAN_HANDLER_H
+
+#include "handler.h"
+
+namespace zim {
+namespace writer {
+
+class XapianIndexer;
+
+class XapianHandler : public DirentHandler {
+  public:
+    XapianHandler(CreatorData* data, bool withFullTextIndex);
+    virtual ~XapianHandler();
+
+    void start() override;
+    void stop() override;
+    bool isCompressible() override { return false; }
+    ContentProviders getContentProviders() const override;
+    void handle(Dirent* dirent, std::shared_ptr<Item> item) override;
+    void handle(Dirent* dirent, const Hints& hints) override;
+
+  protected:
+    Dirents createDirents() const override;
+
+  private: // methods
+    void indexTitle(Dirent* dirent);
+
+  private: // data
+    std::unique_ptr<XapianIndexer> mp_fulltextIndexer;
+    std::unique_ptr<XapianIndexer> mp_titleIndexer;
+    CreatorData* mp_creatorData;
+};
+
+}
+}
+
+#endif // OPENZIM_LIBZIM_XAPIAN_WORKER_H
diff --git a/src/writer/xapianIndexer.cpp b/src/writer/xapianIndexer.cpp
new file mode 100644
index 0000000..ac42681
--- /dev/null
+++ b/src/writer/xapianIndexer.cpp
@@ -0,0 +1,163 @@
+/*
+ * Copyright (C) 2021 Maneesh P M <manu.pm55@gmail.com>
+ * Copyright (C) 2018-2021 Matthieu Gautier <mgautier@kymeria.fr>
+ * Copyright (C) 2011 Emmanuel Engelhart <kelson@kiwix.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU  General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#include "xapianIndexer.h"
+#include "libzim-resources.h"
+#include "fs.h"
+#include "tools.h"
+#include "../constants.h"
+#include <sstream>
+#include <fstream>
+#include <stdexcept>
+#include <cassert>
+
+using namespace zim::writer;
+
+/* Constructor */
+XapianIndexer::XapianIndexer(const std::string& indexPath, const std::string& language, IndexingMode indexingMode, const bool verbose)
+    : indexPath(indexPath),
+      language(language),
+      indexingMode(indexingMode)
+{
+  /* Build ICU Local object to retrieve ISO-639 language code (from
+     ISO-639-3) */
+  icu::Locale languageLocale(language.c_str());
+  stemmer_language = languageLocale.getLanguage();
+
+  /* Read the stopwords */
+  std::string stopWord;
+  try {
+    this->stopwords = getResource("stopwords/" + language);
+  } catch(ResourceNotFound& e) {}
+  std::istringstream file(this->stopwords);
+  while (std::getline(file, stopWord, '\n')) {
+    this->stopper.add(stopWord);
+  }
+}
+
+XapianIndexer::~XapianIndexer()
+{
+  if (!indexPath.empty()) {
+    try {
+#ifndef _WIN32
+//[TODO] Implement remove for windows
+      zim::DEFAULTFS::remove(indexPath + ".tmp");
+      zim::DEFAULTFS::remove(indexPath);
+#endif
+    } catch (...) {
+      /* Do not raise */
+    }
+  }
+}
+
+/*
+ * `valuesmap` is a metadata associated with the Xapian database. We are using it
+ * to attach slot numbers of each document in the index to the value they are storing.
+ * These values and slot numbers are used in collapsing, filtering etc.
+ *
+ * Title index:
+ * Slot 0: Title of the article. Used in collapsing articles with same name.
+ * Slot 1: path/redirectPath of the article. Used in collapsing duplicates(redirects).
+ *
+ * Fulltext Index:
+ * Slot 0: Title of the article. Used in collapsing articles with same name.
+ * Slot 1: Word count of the article.
+ * Slot 2: Geo position of the article. Used for geo-filtering.
+ *
+ * `kind` metadata indicate whether the database is a title or a fulltext index.
+ *
+ * `data` metadata indicate the type of data stored in the index. A value of "fullPath"
+ * means the data stores the complete path with a namespace.
+ */
+
+void XapianIndexer::indexingPrelude()
+{
+  writableDatabase = Xapian::WritableDatabase(indexPath + ".tmp", Xapian::DB_CREATE_OR_OVERWRITE | Xapian::DB_NO_TERMLIST);
+
+  switch (indexingMode) {
+    case IndexingMode::TITLE:
+      writableDatabase.set_metadata("valuesmap", "title:0;targetPath:1");
+      writableDatabase.set_metadata("kind", "title");
+      writableDatabase.set_metadata("data", "fullPath");
+      break;
+    case IndexingMode::FULL:
+      writableDatabase.set_metadata("valuesmap", "title:0;wordcount:1;geo.position:2");
+      writableDatabase.set_metadata("kind", "fulltext");
+      writableDatabase.set_metadata("data", "fullPath");
+      break;
+  }
+  writableDatabase.set_metadata("language", language);
+  writableDatabase.set_metadata("stopwords", stopwords);
+}
+
+/*
+ * For title index, index the full path with namespace as data of the document.
+ * The targetPath in valuesmap will store the path without namespace.
+ * TODO:
+ * Currently for title index we are storing path twice (redirectPath/path in
+ * valuesmap and path in index data). In the future, we want to keep only one of
+ * these(index data if possible) to reduce index size while supporting the
+ * collapse on path feature.
+ */
+
+void XapianIndexer::indexTitle(const std::string& path, const std::string& title, const std::string& targetPath)
+{
+  assert(indexingMode == IndexingMode::TITLE);
+  Xapian::Stem stemmer;
+  Xapian::TermGenerator indexer;
+  try {
+    stemmer = Xapian::Stem(stemmer_language);
+    indexer.set_stemmer(stemmer);
+    indexer.set_stemming_strategy(Xapian::TermGenerator::STEM_SOME);
+  } catch (...) {}
+  Xapian::Document currentDocument;
+  currentDocument.clear_values();
+
+  std::string fullPath = "C/" + path;
+  currentDocument.set_data(fullPath);
+  indexer.set_document(currentDocument);
+
+  std::string unaccentedTitle = zim::removeAccents(title);
+
+  currentDocument.add_value(0, title);
+  if (targetPath.empty()) {
+    currentDocument.add_value(1, path);
+  } else {
+    currentDocument.add_value(1, targetPath);
+  }
+
+  if (!unaccentedTitle.empty()) {
+    std::string anchoredTitle = ANCHOR_TERM + unaccentedTitle;
+    indexer.index_text(anchoredTitle, 1);
+  }
+
+  /* add to the database */
+  writableDatabase.add_document(currentDocument);
+  empty = false;
+}
+
+void XapianIndexer::indexingPostlude()
+{
+  this->writableDatabase.commit();
+  this->writableDatabase.compact(indexPath, Xapian::DBCOMPACT_SINGLE_FILE|Xapian::Compactor::FULLER);
+  this->writableDatabase.close();
+}
+
diff --git a/src/writer/xapianIndexer.h b/src/writer/xapianIndexer.h
new file mode 100644
index 0000000..ffed3a7
--- /dev/null
+++ b/src/writer/xapianIndexer.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (C) 2021 Maneesh P M <manu.pm55@gmail.com>
+ * Copyright (C) 2018-2021 Matthieu Gautier <mgautier@kymeria.fr>
+ * Copyright (C) 2011 Emmanuel Engelhart <kelson@kiwix.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU  General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#ifndef LIBZIM_WRITER_XAPIANINDEXER_H
+#define LIBZIM_WRITER_XAPIANINDEXER_H
+
+#include <zim/writer/item.h>
+
+#include <unicode/locid.h>
+#include <xapian.h>
+#include <zim/blob.h>
+
+
+namespace zim {
+namespace writer {
+
+class IndexTask;
+
+enum class IndexingMode {
+  TITLE,
+  FULL
+};
+
+class XapianIndexer
+{
+ public:
+  XapianIndexer(const std::string& indexPath, const std::string& language, IndexingMode mode, bool verbose);
+  virtual ~XapianIndexer();
+  std::string getIndexPath() { return indexPath; }
+  void indexingPrelude();
+  void indexingPostlude();
+  bool is_empty() { return empty; }
+
+  void indexTitle(const std::string& path, const std::string& title, const std::string& targetPath = "");
+
+ protected:
+  Xapian::WritableDatabase writableDatabase;
+  bool empty {true};
+  std::string stemmer_language;
+  Xapian::SimpleStopper stopper;
+  std::string indexPath;
+  std::string language;
+  std::string stopwords;
+  IndexingMode indexingMode;
+
+ friend class zim::writer::IndexTask;
+};
+
+}
+}
+
+#endif  // LIBZIM_WRITER_XAPIANINDEXER_H
diff --git a/src/writer/xapianWorker.cpp b/src/writer/xapianWorker.cpp
new file mode 100644
index 0000000..3f23027
--- /dev/null
+++ b/src/writer/xapianWorker.cpp
@@ -0,0 +1,110 @@
+/*
+ * Copyright (C) 2021 Maneesh P M <manu.pm55@gmail.com>
+ * Copyright (C) 2020-2021 Matthieu Gautier <mgautier@kymeria.fr>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#include "xapianWorker.h"
+#include "creatordata.h"
+
+#include "xapianIndexer.h"
+
+#include <stdexcept>
+#include <sstream>
+#include <mutex>
+
+static std::mutex s_dbaccessLock;
+std::atomic<unsigned long> zim::writer::IndexTask::waiting_task(0);
+
+namespace zim
+{
+  namespace writer
+  {
+
+    const unsigned int keywordsBoostFactor = 3;
+    inline unsigned int getTitleBoostFactor(const unsigned int contentLength)
+    {
+      return contentLength / 500 + 1;
+    }
+
+    void IndexTask::waitNoMoreTask() {
+      unsigned int wait = 0;
+      do {
+        microsleep(wait);
+        wait += 10;
+      } while (waiting_task.load() > 0);
+    }
+
+    void IndexTask::run(CreatorData* data) {
+      if (!mp_indexData->hasIndexData()) {
+        return;
+      }
+      Xapian::Stem stemmer;
+      Xapian::TermGenerator indexer;
+      try {
+        stemmer = Xapian::Stem(mp_indexer->stemmer_language);
+        indexer.set_stemmer(stemmer);
+        indexer.set_stemming_strategy(Xapian::TermGenerator::STEM_ALL);
+      } catch (...) {
+        // No stemming for language.
+      }
+      indexer.set_stopper(&mp_indexer->stopper);
+      indexer.set_stopper_strategy(Xapian::TermGenerator::STOP_ALL);
+
+      Xapian::Document document;
+      indexer.set_document(document);
+
+      std::string fullPath = "C/" + m_path;
+      document.set_data(fullPath);
+      document.add_value(0, mp_indexData->getTitle());
+
+      std::stringstream countWordStringStream;
+      countWordStringStream << mp_indexData->getWordCount();
+      document.add_value(1, countWordStringStream.str());
+
+      auto geoInfo = mp_indexData->getGeoPosition();
+      if (std::get<0>(geoInfo)) {
+        auto geoPosition = Xapian::LatLongCoord(
+        std::get<1>(geoInfo), std::get<2>(geoInfo)).serialise();
+        document.add_value(2, geoPosition);
+      }
+
+      /* Index the content */
+      auto indexContent = mp_indexData->getContent();
+      if (!indexContent.empty()) {
+        indexer.index_text_without_positions(indexContent);
+      }
+
+      /* Index the title */
+      auto indexTitle = mp_indexData->getTitle();
+      if (!indexTitle.empty()) {
+        indexer.index_text_without_positions(
+          indexTitle, getTitleBoostFactor(indexContent.size()));
+      }
+
+      /* Index the keywords */
+      auto indexKeywords = mp_indexData->getKeywords();
+      if (!indexKeywords.empty()) {
+        indexer.index_text_without_positions(indexKeywords, keywordsBoostFactor);
+      }
+
+      std::lock_guard<std::mutex> l(s_dbaccessLock);
+      mp_indexer->writableDatabase.add_document(document);
+      mp_indexer->empty = false;
+    }
+  }
+}
diff --git a/src/writer/xapianWorker.h b/src/writer/xapianWorker.h
new file mode 100644
index 0000000..4d6ad96
--- /dev/null
+++ b/src/writer/xapianWorker.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (C) 2020-2021 Matthieu Gautier <mgautier@kymeria.fr>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU  General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#ifndef OPENZIM_LIBZIM_XAPIAN_WORKER_H
+#define OPENZIM_LIBZIM_XAPIAN_WORKER_H
+
+#include <atomic>
+#include <memory>
+#include "workers.h"
+#include <zim/writer/item.h>
+
+namespace zim {
+namespace writer {
+
+class Item;
+class XapianIndexer;
+
+class IndexTask : public Task {
+  public:
+    IndexTask(const IndexTask&) = delete;
+    IndexTask& operator=(const IndexTask&) = delete;
+    IndexTask(std::shared_ptr<IndexData> indexData, const std::string& path, XapianIndexer* indexer) :
+      mp_indexData(indexData),
+      m_path(path),
+      mp_indexer(indexer)
+    {
+      ++waiting_task;
+    }
+    virtual ~IndexTask()
+    {
+      --waiting_task;
+    }
+
+    static void waitNoMoreTask();
+
+    virtual void run(CreatorData* data);
+    static std::atomic<unsigned long> waiting_task;
+
+  private:
+    std::shared_ptr<IndexData> mp_indexData;
+    std::string m_path;
+    XapianIndexer* mp_indexer;
+};
+
+}
+}
+
+#endif // OPENZIM_LIBZIM_XAPIAN_WORKER_H
diff --git a/src/xapian/htmlparse.cc b/src/xapian/htmlparse.cc
new file mode 100644
index 0000000..447023f
--- /dev/null
+++ b/src/xapian/htmlparse.cc
@@ -0,0 +1,376 @@
+/* htmlparse.cc: simple HTML parser for omega indexer
+ *
+ * Copyright 1999,2000,2001 BrightStation PLC
+ * Copyright 2001 Ananova Ltd
+ * Copyright 2002,2006,2007,2008 Olly Betts
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
+ * USA
+ */
+
+// #include <config.h>
+
+#include "htmlparse.h"
+
+#include <xapian.h>
+
+// #include "utf8convert.h"
+
+#include <algorithm>
+#include <mutex>
+
+#include <ctype.h>
+#include <cstring>
+#include <stdio.h>
+#include <stdlib.h>
+
+using namespace std;
+
+inline void
+lowercase_string(string &str)
+{
+    for (string::iterator i = str.begin(); i != str.end(); ++i) {
+	*i = tolower(static_cast<unsigned char>(*i));
+    }
+}
+
+map<string, unsigned int> zim::HtmlParser::named_ents;
+static std::mutex sInitLock;
+
+inline static bool
+p_notdigit(char c)
+{
+    return !isdigit(static_cast<unsigned char>(c));
+}
+
+inline static bool
+p_notxdigit(char c)
+{
+    return !isxdigit(static_cast<unsigned char>(c));
+}
+
+inline static bool
+p_notalnum(char c)
+{
+    return !isalnum(static_cast<unsigned char>(c));
+}
+
+inline static bool
+p_notwhitespace(char c)
+{
+    return !isspace(static_cast<unsigned char>(c));
+}
+
+inline static bool
+p_nottag(char c)
+{
+    return !isalnum(static_cast<unsigned char>(c)) &&
+	c != '.' && c != '-' && c != ':'; // ':' for XML namespaces.
+}
+
+inline static bool
+p_whitespacegt(char c)
+{
+    return isspace(static_cast<unsigned char>(c)) || c == '>';
+}
+
+inline static bool
+p_whitespaceeqgt(char c)
+{
+    return isspace(static_cast<unsigned char>(c)) || c == '=' || c == '>';
+}
+
+bool
+zim::HtmlParser::get_parameter(const string & param, string & value)
+{
+    map<string, string>::const_iterator i = parameters.find(param);
+    if (i == parameters.end()) return false;
+    value = i->second;
+    return true;
+}
+
+zim::HtmlParser::HtmlParser()
+{
+    static const struct ent { const char *n; unsigned int v; } ents[] = {
+#include "namedentities.h"
+	{ NULL, 0 }
+    };
+    std::lock_guard<std::mutex> l(sInitLock);
+    if (named_ents.empty()) {
+	const struct ent *i = ents;
+	while (i->n) {
+	    named_ents[string(i->n)] = i->v;
+	    ++i;
+	}
+    }
+}
+
+void
+zim::HtmlParser::decode_entities(string &s)
+{
+    // We need a const_iterator version of s.end() - otherwise the
+    // find() and find_if() templates don't work...
+    string::const_iterator amp = s.begin(), s_end = s.end();
+    while ((amp = find(amp, s_end, '&')) != s_end) {
+	unsigned int val = 0;
+	string::const_iterator end, p = amp + 1;
+	if (p != s_end && *p == '#') {
+	    p++;
+	    if (p != s_end && (*p == 'x' || *p == 'X')) {
+		// hex
+		p++;
+		end = find_if(p, s_end, p_notxdigit);
+		sscanf(s.substr(p - s.begin(), end - p).c_str(), "%x", &val);
+	    } else {
+		// number
+		end = find_if(p, s_end, p_notdigit);
+		val = atoi(s.substr(p - s.begin(), end - p).c_str());
+	    }
+	} else {
+	    end = find_if(p, s_end, p_notalnum);
+	    string code = s.substr(p - s.begin(), end - p);
+	    map<string, unsigned int>::const_iterator i;
+	    i = named_ents.find(code);
+	    if (i != named_ents.end()) val = i->second;
+	}
+	if (end < s_end && *end == ';') end++;
+	if (val) {
+	    string::size_type amp_pos = amp - s.begin();
+	    if (val < 0x80) {
+		s.replace(amp_pos, end - amp, 1u, char(val));
+	    } else {
+		// Convert unicode value val to UTF-8.
+		char seq[4];
+		unsigned len = Xapian::Unicode::nonascii_to_utf8(val, seq);
+		s.replace(amp_pos, end - amp, seq, len);
+	    }
+	    s_end = s.end();
+	    // We've modified the string, so the iterators are no longer
+	    // valid...
+	    amp = s.begin() + amp_pos + 1;
+	} else {
+	    amp = end;
+	}
+    }
+}
+
+void
+zim::HtmlParser::parse_html(const string &body)
+{
+    in_script = false;
+
+    parameters.clear();
+    string::const_iterator start = body.begin();
+
+    while (true) {
+	// Skip through until we find an HTML tag, a comment, or the end of
+	// document.  Ignore isolated occurrences of `<' which don't start
+	// a tag or comment.
+	string::const_iterator p = start;
+	while (true) {
+	    p = find(p, body.end(), '<');
+	    if (p == body.end()) break;
+	    unsigned char ch = *(p + 1);
+
+	    // Tag, closing tag, or comment (or SGML declaration).
+	    if ((!in_script && isalpha(ch)) || ch == '/' || ch == '!') break;
+
+	    if (ch == '?') {
+		// PHP code or XML declaration.
+		// XML declaration is only valid at the start of the first line.
+		// FIXME: need to deal with BOMs...
+		if (p != body.begin() || body.size() < 20) break;
+
+		// XML declaration looks something like this:
+		// <?xml version="1.0" encoding="UTF-8"?>
+		if (p[2] != 'x' || p[3] != 'm' || p[4] != 'l') break;
+		if (strchr(" \t\r\n", p[5]) == NULL) break;
+
+		string::const_iterator decl_end = find(p + 6, body.end(), '?');
+		if (decl_end == body.end()) break;
+
+		// Default charset for XML is UTF-8.
+		charset = "UTF-8";
+
+		string decl(p + 6, decl_end);
+		size_t enc = decl.find("encoding");
+		if (enc == string::npos) break;
+
+		enc = decl.find_first_not_of(" \t\r\n", enc + 8);
+		if (enc == string::npos || enc == decl.size()) break;
+
+		if (decl[enc] != '=') break;
+		
+		enc = decl.find_first_not_of(" \t\r\n", enc + 1);
+		if (enc == string::npos || enc == decl.size()) break;
+
+		if (decl[enc] != '"' && decl[enc] != '\'') break;
+
+		char quote = decl[enc++];
+		size_t enc_end = decl.find(quote, enc);
+
+		if (enc != string::npos)
+		    charset = decl.substr(enc, enc_end - enc);
+
+		break;
+	    }
+	    p++;
+	}
+
+	// Process text up to start of tag.
+	if (p > start) {
+	    string text = body.substr(start - body.begin(), p - start);
+	    // convert_to_utf8(text, charset);
+	    decode_entities(text);
+	    process_text(text);
+	}
+
+	if (p == body.end()) break;
+
+	start = p + 1;
+
+	if (start == body.end()) break;
+
+	if (*start == '!') {
+	    if (++start == body.end()) break;
+	    if (++start == body.end()) break;
+	    // comment or SGML declaration
+	    if (*(start - 1) == '-' && *start == '-') {
+		++start;
+		string::const_iterator close = find(start, body.end(), '>');
+		// An unterminated comment swallows rest of document
+		// (like Netscape, but unlike MSIE IIRC)
+		if (close == body.end()) break;
+
+		p = close;
+		// look for -->
+		while (p != body.end() && (*(p - 1) != '-' || *(p - 2) != '-'))
+		    p = find(p + 1, body.end(), '>');
+
+		if (p != body.end()) {
+		    // Check for htdig's "ignore this bit" comments.
+		    if (p - start == 15 && string(start, p - 2) == "htdig_noindex") {
+			string::size_type i;
+			i = body.find("<!--/htdig_noindex-->", p + 1 - body.begin());
+			if (i == string::npos) break;
+			start = body.begin() + i + 21;
+			continue;
+		    }
+		    // If we found --> skip to there.
+		    start = p;
+		} else {
+		    // Otherwise skip to the first > we found (as Netscape does).
+		    start = close;
+		}
+	    } else {
+		// just an SGML declaration, perhaps giving the DTD - ignore it
+		start = find(start - 1, body.end(), '>');
+		if (start == body.end()) break;
+	    }
+	    ++start;
+	} else if (*start == '?') {
+	    if (++start == body.end()) break;
+	    // PHP - swallow until ?> or EOF
+	    start = find(start + 1, body.end(), '>');
+
+	    // look for ?>
+	    while (start != body.end() && *(start - 1) != '?')
+		start = find(start + 1, body.end(), '>');
+
+	    // unterminated PHP swallows rest of document (rather arbitrarily
+	    // but it avoids polluting the database when things go wrong)
+	    if (start != body.end()) ++start;
+	} else {
+	    // opening or closing tag
+	    int closing = 0;
+
+	    if (*start == '/') {
+		closing = 1;
+		start = find_if(start + 1, body.end(), p_notwhitespace);
+	    }
+
+	    p = start;
+	    start = find_if(start, body.end(), p_nottag);
+	    string tag = body.substr(p - body.begin(), start - p);
+	    // convert tagname to lowercase
+	    lowercase_string(tag);
+
+	    if (closing) {
+		closing_tag(tag);
+		if (in_script && tag == "script") in_script = false;
+
+		/* ignore any bogus parameters on closing tags */
+		p = find(start, body.end(), '>');
+		if (p == body.end()) break;
+		start = p + 1;
+	    } else {
+		// FIXME: parse parameters lazily.
+		while (start < body.end() && *start != '>') {
+		    string name, value;
+
+		    p = find_if(start, body.end(), p_whitespaceeqgt);
+
+		    name.assign(body, start - body.begin(), p - start);
+
+		    p = find_if(p, body.end(), p_notwhitespace);
+
+		    start = p;
+		    if (start != body.end() && *start == '=') {
+			start = find_if(start + 1, body.end(), p_notwhitespace);
+
+			p = body.end();
+
+			int quote = *start;
+			if (quote == '"' || quote == '\'') {
+			    start++;
+			    p = find(start, body.end(), quote);
+			}
+
+			if (p == body.end()) {
+			    // unquoted or no closing quote
+			    p = find_if(start, body.end(), p_whitespacegt);
+			}
+			value.assign(body, start - body.begin(), p - start);
+			start = find_if(p, body.end(), p_notwhitespace);
+
+			if (!name.empty()) {
+			    // convert parameter name to lowercase
+			    lowercase_string(name);
+			    // in case of multiple entries, use the first
+			    // (as Netscape does)
+			    parameters.insert(make_pair(name, value));
+			}
+		    }
+		}
+#if 0
+		cout << "<" << tag;
+		map<string, string>::const_iterator x;
+		for (x = parameters.begin(); x != parameters.end(); x++) {
+		    cout << " " << x->first << "=\"" << x->second << "\"";
+		}
+		cout << ">\n";
+#endif
+		opening_tag(tag);
+		parameters.clear();
+
+		// In <script> tags we ignore opening tags to avoid problems
+		// with "a<b".
+		if (tag == "script") in_script = true;
+
+		if (start != body.end() && *start == '>') ++start;
+	    }
+	}
+    }
+}
diff --git a/src/xapian/htmlparse.h b/src/xapian/htmlparse.h
new file mode 100644
index 0000000..a884b2a
--- /dev/null
+++ b/src/xapian/htmlparse.h
@@ -0,0 +1,53 @@
+/* htmlparse.h: simple HTML parser for omega indexer
+ *
+ * Copyright 1999,2000,2001 BrightStation PLC
+ * Copyright 2002,2006,2008 Olly Betts
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
+ * USA
+ */
+
+#ifndef OMEGA_INCLUDED_HTMLPARSE_H
+#define OMEGA_INCLUDED_HTMLPARSE_H
+
+#include <string>
+#include <map>
+
+using std::string;
+using std::map;
+
+namespace zim {
+
+class HtmlParser {
+	map<string, string> parameters;
+    protected:
+	void decode_entities(string &s);
+	bool in_script;
+	string charset;
+	static map<string, unsigned int> named_ents;
+
+	bool get_parameter(const string & param, string & value);
+    public:
+	virtual void process_text(const string &/*text*/) { }
+	virtual void opening_tag(const string &/*tag*/) { }
+	virtual void closing_tag(const string &/*tag*/) { }
+	virtual void parse_html(const string &text);
+	HtmlParser();
+	virtual ~HtmlParser() { }
+};
+
+};
+
+#endif // OMEGA_INCLUDED_HTMLPARSE_H
diff --git a/src/xapian/myhtmlparse.cc b/src/xapian/myhtmlparse.cc
new file mode 100644
index 0000000..d036645
--- /dev/null
+++ b/src/xapian/myhtmlparse.cc
@@ -0,0 +1,347 @@
+/* myhtmlparse.cc: subclass of HtmlParser for extracting text.
+ *
+ * Copyright 1999,2000,2001 BrightStation PLC
+ * Copyright 2002,2003,2004,2006,2007,2008 Olly Betts
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
+ * USA
+ */
+
+// #include <config.h>
+
+#include "myhtmlparse.h"
+
+// #include "utf8convert.h"
+
+#include <ctype.h>
+#include <sstream>
+#include <string.h>
+
+inline void lowercase_string(string &str) {
+  for (string::iterator i = str.begin(); i != str.end(); ++i) {
+    *i = tolower(static_cast<unsigned char>(*i));
+  }
+}
+
+void zim::MyHtmlParser::parse_html(const string &text, const string &charset_,
+                                   bool charset_from_meta_) {
+  charset = charset_;
+  charset_from_meta = charset_from_meta_;
+  HtmlParser::parse_html(text);
+}
+
+void zim::MyHtmlParser::process_text(const string &text) {
+  if (!text.empty() && !in_script_tag && !in_style_tag) {
+    string::size_type b = text.find_first_not_of(WHITESPACE);
+    if (b)
+      pending_space = true;
+    while (b != string::npos) {
+      if (pending_space && !dump.empty())
+        dump += ' ';
+      string::size_type e = text.find_first_of(WHITESPACE, b);
+      pending_space = (e != string::npos);
+      if (!pending_space) {
+        dump.append(text.data() + b, text.size() - b);
+        return;
+      }
+      dump.append(text.data() + b, e - b);
+      b = text.find_first_not_of(WHITESPACE, e + 1);
+    }
+  }
+}
+
+inline float _stof(std::string str) {
+  std::istringstream stream(str);
+  float ret;
+  stream >> ret;
+  return ret;
+}
+
+void zim::MyHtmlParser::opening_tag(const string &tag) {
+  if (tag.empty())
+    return;
+  switch (tag[0]) {
+  case 'a':
+    if (tag == "address")
+      pending_space = true;
+    break;
+  case 'b':
+    if (tag == "body") {
+      dump.resize(0);
+      break;
+    }
+    if (tag == "blockquote" || tag == "br")
+      pending_space = true;
+    break;
+  case 'c':
+    if (tag == "center")
+      pending_space = true;
+    break;
+  case 'd':
+    if (tag == "dd" || tag == "dir" || tag == "div" || tag == "dl" ||
+        tag == "dt")
+      pending_space = true;
+    break;
+  case 'e':
+    if (tag == "embed")
+      pending_space = true;
+    break;
+  case 'f':
+    if (tag == "fieldset" || tag == "form")
+      pending_space = true;
+    break;
+  case 'h':
+    // hr, and h1, ..., h6
+    if (tag.length() == 2 && strchr("r123456", tag[1]))
+      pending_space = true;
+    break;
+  case 'i':
+    if (tag == "iframe" || tag == "img" || tag == "isindex" || tag == "input")
+      pending_space = true;
+    break;
+  case 'k':
+    if (tag == "keygen")
+      pending_space = true;
+    break;
+  case 'l':
+    if (tag == "legend" || tag == "li" || tag == "listing")
+      pending_space = true;
+    break;
+  case 'm':
+    if (tag == "meta") {
+      string content;
+      if (get_parameter("content", content)) {
+        string name;
+        if (get_parameter("name", name)) {
+          lowercase_string(name);
+          if (name == "description") {
+            if (sample.empty()) {
+              swap(sample, content);
+              // convert_to_utf8(sample, charset);
+              decode_entities(sample);
+            }
+          } else if (name == "keywords") {
+            if (!keywords.empty())
+              keywords += ' ';
+            // convert_to_utf8(content, charset);
+            decode_entities(content);
+            keywords += content;
+          } else if (name == "robots") {
+            decode_entities(content);
+            lowercase_string(content);
+            if (content.find("none") != string::npos ||
+                content.find("noindex") != string::npos) {
+              indexing_allowed = false;
+              throw true;
+            }
+          } else if (name == "geo.position") {
+            auto sep_pos = content.find(";");
+            if (sep_pos != string::npos) {
+              try {
+                latitude = _stof(content.substr(0, sep_pos));
+                longitude = _stof(content.substr(sep_pos + 1));
+                has_geoPosition = true;
+              } catch (...) {
+                // invalid value in content, just pass and continue.
+              }
+            }
+          }
+          break;
+        }
+        // If the current charset came from a meta tag, don't
+        // force reparsing again!
+        if (charset_from_meta)
+          break;
+        string hdr;
+        if (get_parameter("http-equiv", hdr)) {
+          lowercase_string(hdr);
+          if (hdr == "content-type") {
+            lowercase_string(content);
+            size_t start = content.find("charset=");
+            if (start == string::npos)
+              break;
+            start += 8;
+            if (start == content.size())
+              break;
+            size_t end = start;
+            if (content[start] != '"') {
+              while (end < content.size()) {
+                unsigned char ch = content[end];
+                if (ch <= 32 || ch >= 127 || strchr(";()<>@,:\\\"/[]?={}", ch))
+                  break;
+                ++end;
+              }
+            } else {
+              ++start;
+              ++end;
+              while (end < content.size()) {
+                unsigned char ch = content[end];
+                if (ch == '"')
+                  break;
+                if (ch == '\\')
+                  content.erase(end, 1);
+                ++end;
+              }
+            }
+            string newcharset(content, start, end - start);
+            if (charset != newcharset) {
+              throw newcharset;
+            }
+          }
+        }
+        break;
+      }
+      if (charset_from_meta)
+        break;
+      string newcharset;
+      if (get_parameter("charset", newcharset)) {
+        // HTML5 added: <meta charset="...">
+        lowercase_string(newcharset);
+        if (charset != newcharset) {
+          throw newcharset;
+        }
+      }
+      break;
+    }
+    if (tag == "marquee" || tag == "menu" || tag == "multicol")
+      pending_space = true;
+    break;
+  case 'o':
+    if (tag == "ol" || tag == "option")
+      pending_space = true;
+    break;
+  case 'p':
+    if (tag == "p" || tag == "pre" || tag == "plaintext")
+      pending_space = true;
+    break;
+  case 'q':
+    if (tag == "q")
+      pending_space = true;
+    break;
+  case 's':
+    if (tag == "style") {
+      in_style_tag = true;
+      break;
+    }
+    if (tag == "script") {
+      in_script_tag = true;
+      break;
+    }
+    if (tag == "select")
+      pending_space = true;
+    break;
+  case 't':
+    if (tag == "table" || tag == "td" || tag == "textarea" || tag == "th")
+      pending_space = true;
+    break;
+  case 'u':
+    if (tag == "ul")
+      pending_space = true;
+    break;
+  case 'x':
+    if (tag == "xmp")
+      pending_space = true;
+    break;
+  }
+}
+
+void zim::MyHtmlParser::closing_tag(const string &tag) {
+  if (tag.empty())
+    return;
+  switch (tag[0]) {
+  case 'a':
+    if (tag == "address")
+      pending_space = true;
+    break;
+  case 'b':
+    if (tag == "body") {
+      throw true;
+    }
+    if (tag == "blockquote" || tag == "br")
+      pending_space = true;
+    break;
+  case 'c':
+    if (tag == "center")
+      pending_space = true;
+    break;
+  case 'd':
+    if (tag == "dd" || tag == "dir" || tag == "div" || tag == "dl" ||
+        tag == "dt")
+      pending_space = true;
+    break;
+  case 'f':
+    if (tag == "fieldset" || tag == "form")
+      pending_space = true;
+    break;
+  case 'h':
+    // hr, and h1, ..., h6
+    if (tag.length() == 2 && strchr("r123456", tag[1]))
+      pending_space = true;
+    break;
+  case 'i':
+    if (tag == "iframe")
+      pending_space = true;
+    break;
+  case 'l':
+    if (tag == "legend" || tag == "li" || tag == "listing")
+      pending_space = true;
+    break;
+  case 'm':
+    if (tag == "marquee" || tag == "menu")
+      pending_space = true;
+    break;
+  case 'o':
+    if (tag == "ol" || tag == "option")
+      pending_space = true;
+    break;
+  case 'p':
+    if (tag == "p" || tag == "pre")
+      pending_space = true;
+    break;
+  case 'q':
+    if (tag == "q")
+      pending_space = true;
+    break;
+  case 's':
+    if (tag == "style") {
+      in_style_tag = false;
+      break;
+    }
+    if (tag == "script") {
+      in_script_tag = false;
+      break;
+    }
+    if (tag == "select")
+      pending_space = true;
+    break;
+  case 't':
+    if (tag == "title") {
+      if (title.empty())
+        swap(title, dump);
+      break;
+    }
+    if (tag == "table" || tag == "td" || tag == "textarea" || tag == "th")
+      pending_space = true;
+    break;
+  case 'u':
+    if (tag == "ul")
+      pending_space = true;
+    break;
+  case 'x':
+    if (tag == "xmp")
+      pending_space = true;
+    break;
+  }
+}
diff --git a/src/xapian/myhtmlparse.h b/src/xapian/myhtmlparse.h
new file mode 100644
index 0000000..a1f2101
--- /dev/null
+++ b/src/xapian/myhtmlparse.h
@@ -0,0 +1,75 @@
+/* myhtmlparse.h: subclass of HtmlParser for extracting text
+ *
+ * Copyright 1999,2000,2001 BrightStation PLC
+ * Copyright 2002,2003,2004,2006,2008 Olly Betts
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
+ * USA
+ */
+
+#ifndef OMEGA_INCLUDED_MYHTMLPARSE_H
+#define OMEGA_INCLUDED_MYHTMLPARSE_H
+
+#include "htmlparse.h"
+
+// FIXME: Should we include \xa0 which is non-breaking space in iso-8859-1, but
+// not in all charsets and perhaps spans of all \xa0 should become a single
+// \xa0?
+#define WHITESPACE " \t\n\r"
+
+namespace zim {
+
+class MyHtmlParser : public HtmlParser {
+    public:
+	bool in_script_tag;
+	bool in_style_tag;
+	bool pending_space;
+	bool indexing_allowed;
+	bool charset_from_meta;
+    float latitude, longitude;
+    bool has_geoPosition;
+	string title, sample, keywords, dump;
+	void process_text(const string &text);
+	void opening_tag(const string &tag);
+	void closing_tag(const string &tag);
+	using HtmlParser::parse_html;
+	void parse_html(const string &text, const string &charset_,
+			bool charset_from_meta_);
+	MyHtmlParser() :
+		in_script_tag(false),
+		in_style_tag(false),
+		pending_space(false),
+		indexing_allowed(true),
+		charset_from_meta(false),
+        latitude(0), longitude(0), has_geoPosition(false) { }
+
+	void reset() {
+	    in_script_tag = false;
+	    in_style_tag = false;
+	    pending_space = false;
+	    indexing_allowed = true;
+	    charset_from_meta = false;
+        latitude = longitude = 0;
+        has_geoPosition = false;
+	    title.resize(0);
+	    sample.resize(0);
+	    keywords.resize(0);
+	    dump.resize(0);
+	}
+};
+
+};
+
+#endif // OMEGA_INCLUDED_MYHTMLPARSE_H
diff --git a/src/xapian/namedentities.h b/src/xapian/namedentities.h
new file mode 100644
index 0000000..8b7f03e
--- /dev/null
+++ b/src/xapian/namedentities.h
@@ -0,0 +1,279 @@
+/* namedentities.h: named HTML entities.
+ *
+ * Copyright (C) 2006,2007 Olly Betts
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ */
+
+#ifndef OMEGA_INCLUDED_NAMEDENTITIES_H
+#define OMEGA_INCLUDED_NAMEDENTITIES_H
+
+// Names and values from: "Character entity references in HTML 4"
+// http://www.w3.org/TR/html4/sgml/entities.html
+{ "quot", 34 },
+{ "amp", 38 },
+{ "apos", 39 }, // Not in HTML 4 list but used in OpenOffice XML.
+{ "lt", 60 },
+{ "gt", 62 },
+{ "nbsp", 160 },
+{ "iexcl", 161 },
+{ "cent", 162 },
+{ "pound", 163 },
+{ "curren", 164 },
+{ "yen", 165 },
+{ "brvbar", 166 },
+{ "sect", 167 },
+{ "uml", 168 },
+{ "copy", 169 },
+{ "ordf", 170 },
+{ "laquo", 171 },
+{ "not", 172 },
+{ "shy", 173 },
+{ "reg", 174 },
+{ "macr", 175 },
+{ "deg", 176 },
+{ "plusmn", 177 },
+{ "sup2", 178 },
+{ "sup3", 179 },
+{ "acute", 180 },
+{ "micro", 181 },
+{ "para", 182 },
+{ "middot", 183 },
+{ "cedil", 184 },
+{ "sup1", 185 },
+{ "ordm", 186 },
+{ "raquo", 187 },
+{ "frac14", 188 },
+{ "frac12", 189 },
+{ "frac34", 190 },
+{ "iquest", 191 },
+{ "Agrave", 192 },
+{ "Aacute", 193 },
+{ "Acirc", 194 },
+{ "Atilde", 195 },
+{ "Auml", 196 },
+{ "Aring", 197 },
+{ "AElig", 198 },
+{ "Ccedil", 199 },
+{ "Egrave", 200 },
+{ "Eacute", 201 },
+{ "Ecirc", 202 },
+{ "Euml", 203 },
+{ "Igrave", 204 },
+{ "Iacute", 205 },
+{ "Icirc", 206 },
+{ "Iuml", 207 },
+{ "ETH", 208 },
+{ "Ntilde", 209 },
+{ "Ograve", 210 },
+{ "Oacute", 211 },
+{ "Ocirc", 212 },
+{ "Otilde", 213 },
+{ "Ouml", 214 },
+{ "times", 215 },
+{ "Oslash", 216 },
+{ "Ugrave", 217 },
+{ "Uacute", 218 },
+{ "Ucirc", 219 },
+{ "Uuml", 220 },
+{ "Yacute", 221 },
+{ "THORN", 222 },
+{ "szlig", 223 },
+{ "agrave", 224 },
+{ "aacute", 225 },
+{ "acirc", 226 },
+{ "atilde", 227 },
+{ "auml", 228 },
+{ "aring", 229 },
+{ "aelig", 230 },
+{ "ccedil", 231 },
+{ "egrave", 232 },
+{ "eacute", 233 },
+{ "ecirc", 234 },
+{ "euml", 235 },
+{ "igrave", 236 },
+{ "iacute", 237 },
+{ "icirc", 238 },
+{ "iuml", 239 },
+{ "eth", 240 },
+{ "ntilde", 241 },
+{ "ograve", 242 },
+{ "oacute", 243 },
+{ "ocirc", 244 },
+{ "otilde", 245 },
+{ "ouml", 246 },
+{ "divide", 247 },
+{ "oslash", 248 },
+{ "ugrave", 249 },
+{ "uacute", 250 },
+{ "ucirc", 251 },
+{ "uuml", 252 },
+{ "yacute", 253 },
+{ "thorn", 254 },
+{ "yuml", 255 },
+{ "OElig", 338 },
+{ "oelig", 339 },
+{ "Scaron", 352 },
+{ "scaron", 353 },
+{ "Yuml", 376 },
+{ "fnof", 402 },
+{ "circ", 710 },
+{ "tilde", 732 },
+{ "Alpha", 913 },
+{ "Beta", 914 },
+{ "Gamma", 915 },
+{ "Delta", 916 },
+{ "Epsilon", 917 },
+{ "Zeta", 918 },
+{ "Eta", 919 },
+{ "Theta", 920 },
+{ "Iota", 921 },
+{ "Kappa", 922 },
+{ "Lambda", 923 },
+{ "Mu", 924 },
+{ "Nu", 925 },
+{ "Xi", 926 },
+{ "Omicron", 927 },
+{ "Pi", 928 },
+{ "Rho", 929 },
+{ "Sigma", 931 },
+{ "Tau", 932 },
+{ "Upsilon", 933 },
+{ "Phi", 934 },
+{ "Chi", 935 },
+{ "Psi", 936 },
+{ "Omega", 937 },
+{ "alpha", 945 },
+{ "beta", 946 },
+{ "gamma", 947 },
+{ "delta", 948 },
+{ "epsilon", 949 },
+{ "zeta", 950 },
+{ "eta", 951 },
+{ "theta", 952 },
+{ "iota", 953 },
+{ "kappa", 954 },
+{ "lambda", 955 },
+{ "mu", 956 },
+{ "nu", 957 },
+{ "xi", 958 },
+{ "omicron", 959 },
+{ "pi", 960 },
+{ "rho", 961 },
+{ "sigmaf", 962 },
+{ "sigma", 963 },
+{ "tau", 964 },
+{ "upsilon", 965 },
+{ "phi", 966 },
+{ "chi", 967 },
+{ "psi", 968 },
+{ "omega", 969 },
+{ "thetasym", 977 },
+{ "upsih", 978 },
+{ "piv", 982 },
+{ "ensp", 8194 },
+{ "emsp", 8195 },
+{ "thinsp", 8201 },
+{ "zwnj", 8204 },
+{ "zwj", 8205 },
+{ "lrm", 8206 },
+{ "rlm", 8207 },
+{ "ndash", 8211 },
+{ "mdash", 8212 },
+{ "lsquo", 8216 },
+{ "rsquo", 8217 },
+{ "sbquo", 8218 },
+{ "ldquo", 8220 },
+{ "rdquo", 8221 },
+{ "bdquo", 8222 },
+{ "dagger", 8224 },
+{ "Dagger", 8225 },
+{ "bull", 8226 },
+{ "hellip", 8230 },
+{ "permil", 8240 },
+{ "prime", 8242 },
+{ "Prime", 8243 },
+{ "lsaquo", 8249 },
+{ "rsaquo", 8250 },
+{ "oline", 8254 },
+{ "frasl", 8260 },
+{ "euro", 8364 },
+{ "image", 8465 },
+{ "weierp", 8472 },
+{ "real", 8476 },
+{ "trade", 8482 },
+{ "alefsym", 8501 },
+{ "larr", 8592 },
+{ "uarr", 8593 },
+{ "rarr", 8594 },
+{ "darr", 8595 },
+{ "harr", 8596 },
+{ "crarr", 8629 },
+{ "lArr", 8656 },
+{ "uArr", 8657 },
+{ "rArr", 8658 },
+{ "dArr", 8659 },
+{ "hArr", 8660 },
+{ "forall", 8704 },
+{ "part", 8706 },
+{ "exist", 8707 },
+{ "empty", 8709 },
+{ "nabla", 8711 },
+{ "isin", 8712 },
+{ "notin", 8713 },
+{ "ni", 8715 },
+{ "prod", 8719 },
+{ "sum", 8721 },
+{ "minus", 8722 },
+{ "lowast", 8727 },
+{ "radic", 8730 },
+{ "prop", 8733 },
+{ "infin", 8734 },
+{ "ang", 8736 },
+{ "and", 8743 },
+{ "or", 8744 },
+{ "cap", 8745 },
+{ "cup", 8746 },
+{ "int", 8747 },
+{ "there4", 8756 },
+{ "sim", 8764 },
+{ "cong", 8773 },
+{ "asymp", 8776 },
+{ "ne", 8800 },
+{ "equiv", 8801 },
+{ "le", 8804 },
+{ "ge", 8805 },
+{ "sub", 8834 },
+{ "sup", 8835 },
+{ "nsub", 8836 },
+{ "sube", 8838 },
+{ "supe", 8839 },
+{ "oplus", 8853 },
+{ "otimes", 8855 },
+{ "perp", 8869 },
+{ "sdot", 8901 },
+{ "lceil", 8968 },
+{ "rceil", 8969 },
+{ "lfloor", 8970 },
+{ "rfloor", 8971 },
+{ "lang", 9001 },
+{ "rang", 9002 },
+{ "loz", 9674 },
+{ "spades", 9824 },
+{ "clubs", 9827 },
+{ "hearts", 9829 },
+{ "diams", 9830 },
+
+#endif // OMEGA_INCLUDED_NAMEDENTITIES_H
diff --git a/src/zim_types.h b/src/zim_types.h
new file mode 100644
index 0000000..7625de2
--- /dev/null
+++ b/src/zim_types.h
@@ -0,0 +1,137 @@
+/*
+ * Copyright (C) 2018-2021 Matthieu Gautier <mgautier@kymeria.fr>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+
+#ifndef ZIM_TYPES_H
+#define ZIM_TYPES_H
+
+#include <zim/zim.h>
+
+#include <ostream>
+
+#ifdef __GNUC__
+#define PACKED __attribute__((packed))
+#else
+#define PACKED
+#endif
+
+
+template<typename B>
+struct REAL_TYPEDEF{
+  typedef B base_type;
+  B v;
+  REAL_TYPEDEF() : v(0) {};
+  explicit REAL_TYPEDEF(B v) : v(v) {};
+  explicit inline operator bool() const { return v != 0; }
+  explicit inline operator B() const { return v; }
+
+  inline bool operator==(const REAL_TYPEDEF<B>& rhs) const
+  { return v == rhs.v; }
+
+  inline REAL_TYPEDEF<B>& operator++()
+  { v++; return *this; }
+
+  inline REAL_TYPEDEF<B> operator++(int)
+  { return REAL_TYPEDEF<B>(v++); }
+} PACKED;
+
+template<typename T> inline T& operator+= (T& lhs, const T& rhs)
+{
+  lhs.v += rhs.v;
+  return lhs;
+}
+
+template<typename T> inline T& operator+= (T& lhs, const typename T::base_type& rhs)
+{
+  lhs.v += rhs;
+  return lhs;
+}
+
+template<typename T> inline T operator+(T lhs, const T& rhs)
+{
+  lhs += rhs;
+  return lhs;
+}
+
+template<typename T> inline T& operator-=(T& lhs, const T& rhs)
+{
+  lhs.v -= rhs.v;
+  return lhs;
+}
+
+template<typename T> inline T operator-(T lhs, const T& rhs)
+{
+  lhs -= rhs;
+  return lhs;
+}
+
+template<typename T> inline bool operator< (const T& lhs, const T& rhs)
+{ return lhs.v < rhs.v; }
+
+template<typename T> inline bool operator> (const T& lhs, const T& rhs)
+{ return rhs < lhs; }
+
+template<typename T> inline bool operator<=(const T& lhs, const T& rhs)
+{ return !(lhs > rhs); }
+
+template<typename T> inline bool operator>=(const T& lhs, const T& rhs)
+{ return !(lhs < rhs); }
+
+template<typename T> inline bool operator!=(const T& lhs, const T& rhs)
+{ return !(lhs == rhs); }
+
+
+template<typename B>
+std::ostream& operator<<(std::ostream& os, const REAL_TYPEDEF<B>& obj)
+{
+    os << obj.v;
+    return os;
+}
+
+namespace zim {
+
+#define TYPEDEF(NAME, TYPE) struct NAME : public REAL_TYPEDEF<TYPE> { \
+explicit NAME(TYPE v=0) : REAL_TYPEDEF<TYPE>(v) {}; } PACKED; \
+static_assert(sizeof(NAME) == sizeof(TYPE), "");
+
+TYPEDEF(entry_index_t, entry_index_type)
+TYPEDEF(title_index_t, entry_index_type)
+TYPEDEF(cluster_index_t, cluster_index_type)
+TYPEDEF(blob_index_t, blob_index_type)
+
+TYPEDEF(zsize_t, size_type)
+TYPEDEF(offset_t, offset_type)
+
+#undef TYPEDEF
+
+inline offset_t& operator+= (offset_t& lhs, const zsize_t& rhs)
+{
+  lhs.v += rhs.v;
+  return lhs;
+}
+
+inline offset_t operator+(offset_t lhs, const zsize_t& rhs)
+{
+  lhs += rhs;
+  return lhs;
+}
+
+};
+
+#endif //ZIM_TYPES_H
diff --git a/static/meson.build b/static/meson.build
new file mode 100644
index 0000000..b6c1d7f
--- /dev/null
+++ b/static/meson.build
@@ -0,0 +1,12 @@
+
+resources_list = 'resources_list.txt'
+
+lib_resources = custom_target('resources',
+  input: resources_list,
+  output: ['libzim-resources.cpp', 'libzim-resources.h'],
+  command:[res_compiler,
+           '--cxxfile', '@OUTPUT0@',
+           '--hfile', '@OUTPUT1@',
+           '--source_dir', '@OUTDIR@',
+           '@INPUT@']
+)
diff --git a/static/resources_list.txt b/static/resources_list.txt
new file mode 100644
index 0000000..fd26c65
--- /dev/null
+++ b/static/resources_list.txt
@@ -0,0 +1,58 @@
+stopwords/af
+stopwords/ar
+stopwords/bg
+stopwords/bn
+stopwords/br
+stopwords/ca
+stopwords/cs
+stopwords/da
+stopwords/de
+stopwords/el
+stopwords/en
+stopwords/eo
+stopwords/es
+stopwords/et
+stopwords/eu
+stopwords/fa
+stopwords/fi
+stopwords/fr
+stopwords/ga
+stopwords/gl
+stopwords/gu
+stopwords/ha
+stopwords/he
+stopwords/hi
+stopwords/hr
+stopwords/hu
+stopwords/hy
+stopwords/id
+stopwords/it
+stopwords/ja
+stopwords/ko
+stopwords/ku
+stopwords/la
+stopwords/lt
+stopwords/lv
+stopwords/mr
+stopwords/ms
+stopwords/nl
+stopwords/no
+stopwords/pl
+stopwords/pt
+stopwords/ro
+stopwords/ru
+stopwords/sk
+stopwords/sl
+stopwords/so
+stopwords/st
+stopwords/sv
+stopwords/sw
+stopwords/th
+stopwords/tl
+stopwords/tr
+stopwords/uk
+stopwords/ur
+stopwords/vi
+stopwords/yo
+stopwords/zh
+stopwords/zu
\ No newline at end of file
diff --git a/static/stopwords/af b/static/stopwords/af
new file mode 100644
index 0000000..b13c76d
--- /dev/null
+++ b/static/stopwords/af
@@ -0,0 +1,51 @@
+'n
+aan
+af
+al
+as
+baie
+by
+daar
+dag
+dat
+die
+dit
+een
+ek
+en
+gaan
+gesÃª
+haar
+het
+hom
+hulle
+hy
+in
+is
+jou
+jy
+kan
+kom
+ma
+maar
+met
+my
+na
+nie
+om
+ons
+op
+saam
+sal
+se
+sien
+so
+sy
+te
+toe
+uit
+van
+vir
+was
+wat
+Å
\ No newline at end of file
diff --git a/static/stopwords/ar b/static/stopwords/ar
new file mode 100644
index 0000000..81173e6
--- /dev/null
+++ b/static/stopwords/ar
@@ -0,0 +1,480 @@
+Ø
+Ø¢Ø¶
+Ø¢ÙÙÙÙ
+Ø¢Ù
+Ø¢ÙØ§Ù
+Ø¢Ù
+Ø£
+Ø£Ø¨
+Ø£Ø¬Ù
+Ø£Ø¬ÙØ¹
+Ø£Ø®
+Ø£Ø®Ø°
+Ø£ØµØ¨Ø­
+Ø£Ø¶Ø­Ù
+Ø£ÙØ¨Ù
+Ø£ÙÙ
+Ø£ÙØ«Ø±
+Ø£ÙØ§
+Ø£Ù
+Ø£ÙØ§
+Ø£ÙØ§ÙÙ
+Ø£ÙØ§ÙÙÙ
+Ø£ÙØ³Ù
+Ø£ÙÙØ§
+Ø£Ù
+Ø£ÙØ§
+Ø£ÙØª
+Ø£ÙØªÙ
+Ø£ÙØªÙØ§
+Ø£ÙØªÙ
+Ø£ÙØªÙ
+Ø£ÙØ´Ø£
+Ø£ÙÙÙ
+Ø£Ù
+Ø£ÙØ´Ù
+Ø£ÙÙØ¦Ù
+Ø£ÙÙØ¦ÙÙ
+Ø£ÙÙØ§Ø¡
+Ø£ÙÙØ§ÙÙ
+Ø£ÙÙÙÙ
+Ø£Ù
+Ø£ÙØ§
+Ø£ÙÙ
+Ø£ÙÙÙØ§
+Ø£ÙÙ
+Ø£ÙÙÙÙ
+Ø£ÙÙÙÙÙ
+Ø£ÙÙÙÙ
+Ø¥Ø°
+Ø¥Ø°Ø§
+Ø¥Ø°Ø§Ù
+Ø¥Ø°ÙØ§
+Ø¥Ø°Ù
+Ø¥ÙÙ
+Ø¥ÙÙÙÙ
+Ø¥ÙÙÙÙØ§
+Ø¥ÙÙÙÙÙ
+Ø¥ÙÙÙÙ
+Ø¥ÙÙÙÙÙÙ
+Ø¥ÙÙØ§
+Ø¥ÙÙØ§
+Ø¥Ù
+Ø¥ÙÙÙØ§
+Ø¥Ù
+Ø¥ÙØ§Ù
+Ø¥ÙØ§ÙÙ
+Ø¥ÙØ§ÙÙØ§
+Ø¥ÙØ§ÙÙ
+Ø¥ÙØ§ÙØ§
+Ø¥ÙØ§Ù
+Ø¥ÙØ§ÙØ§
+Ø¥ÙØ§ÙÙ
+Ø¥ÙØ§ÙÙØ§
+Ø¥ÙØ§ÙÙ
+Ø¥ÙØ§Ù
+Ø¥ÙÙÙ
+Ø¥ÙÙÙÙ
+Ø§
+Ø§Ø¨ØªØ¯Ø£
+Ø§Ø«Ø±
+Ø§Ø¬Ù
+Ø§Ø­Ø¯
+Ø§Ø®Ø±Ù
+Ø§Ø®ÙÙÙÙ
+Ø§Ø°Ø§
+Ø§Ø±Ø¨Ø¹Ø©
+Ø§Ø±ØªØ¯Ù
+Ø§Ø³ØªØ­Ø§Ù
+Ø§Ø·Ø§Ø±
+Ø§Ø¹Ø§Ø¯Ø©
+Ø§Ø¹ÙÙØª
+Ø§Ù
+Ø§ÙØ«Ø±
+Ø§ÙØ¯
+Ø§ÙØ£ÙØ§Ø¡
+Ø§ÙØ£ÙÙ
+Ø§ÙØ§
+Ø§ÙØ§Ø®ÙØ±Ø©
+Ø§ÙØ§Ù
+Ø§ÙØ§ÙÙ
+Ø§ÙØ§ÙÙÙ
+Ø§ÙØªÙ
+Ø§ÙØªÙ
+Ø§ÙØ«Ø§ÙÙ
+Ø§ÙØ«Ø§ÙÙØ©
+Ø§ÙØ°Ø§ØªÙ
+Ø§ÙØ°Ù
+Ø§ÙØ°Ù
+Ø§ÙØ°ÙÙ
+Ø§ÙØ³Ø§Ø¨Ù
+Ø§ÙÙ
+Ø§ÙÙØ§Ø¦Ù
+Ø§ÙÙØ§ØªÙ
+Ø§ÙÙØªØ§Ù
+Ø§ÙÙØªÙØ§
+Ø§ÙÙØªÙÙ
+Ø§ÙÙØ°Ø§Ù
+Ø§ÙÙØ°ÙÙ
+Ø§ÙÙÙØ§ØªÙ
+Ø§ÙÙØ§Ø¶Ù
+Ø§ÙÙÙØ¨Ù
+Ø§ÙÙÙØª
+Ø§ÙÙ
+Ø§ÙÙÙÙ
+Ø§ÙØ§
+Ø§ÙØ§Ù
+Ø§ÙØ³
+Ø§Ù
+Ø§ÙØ¨Ø±Ù
+Ø§ÙÙÙØ¨
+Ø§ÙÙ
+Ø§ÙÙØ§
+Ø§Ù
+Ø§ÙÙ
+Ø§Ù
+Ø§ÙØ§Ø±
+Ø§ÙØ§Ù
+Ø§ÙØ¶Ø§
+Ø¨
+Ø¨Ø§Øª
+Ø¨Ø§Ø³Ù
+Ø¨Ø§Ù
+Ø¨Ø®Ù
+Ø¨Ø±Ø³
+Ø¨Ø³Ø¨Ø¨
+Ø¨Ø³Ù
+Ø¨Ø´ÙÙ
+Ø¨Ø¶Ø¹
+Ø¨Ø·Ø¢Ù
+Ø¨Ø¹Ø¯
+Ø¨Ø¹Ø¶
+Ø¨Ù
+Ø¨ÙÙ
+Ø¨ÙÙØ§
+Ø¨ÙÙ
+Ø¨Ù
+Ø¨ÙÙ
+Ø¨ÙØ§
+Ø¨ÙØ§Ø°Ø§
+Ø¨ÙÙ
+Ø¨Ù
+Ø¨ÙØ§
+Ø¨Ù
+Ø¨ÙØ§
+Ø¨Ù
+Ø¨ÙØ¯
+Ø¨ÙÙ
+Ø¨ÙØ³Ù
+Ø¨ÙÙÙÙÙ
+Ø¨ÙØ¦ÙØ³Ù
+ØªØ§ÙÙ
+ØªØ§ÙÙÙ
+ØªØ¨Ø¯ÙÙ
+ØªØ¬Ø§Ù
+ØªØ­ÙÙÙ
+ØªÙÙØ§Ø¡
+ØªÙÙ
+ØªÙÙÙ
+ØªÙÙÙØ§
+ØªÙ
+ØªÙÙÙ
+ØªÙÙÙÙÙ
+ØªÙÙ
+ØªÙÙ
+Ø«ÙØ§Ø«Ø©
+Ø«Ù
+Ø«ÙÙ
+Ø«ÙÙØ©
+Ø«ÙÙÙÙ
+Ø¬Ø¹Ù
+Ø¬ÙÙ
+Ø¬ÙÙØ¹
+Ø¬ÙØ±
+Ø­Ø§Ø±
+Ø­Ø§Ø´Ø§
+Ø­Ø§ÙÙØ§
+Ø­Ø§Ù
+Ø­ØªÙ
+Ø­Ø±Ù
+Ø­Ø³Ø¨
+Ø­Ù
+Ø­ÙØ§ÙÙ
+Ø­ÙÙ
+Ø­ÙØ«
+Ø­ÙØ«ÙØ§
+Ø­ÙÙ
+Ø­ÙÙÙ
+Ø­ÙØ¨ÙÙØ°ÙØ§
+Ø­ÙØªÙÙÙ
+Ø­ÙØ°Ø§Ø±Ù
+Ø®ÙØ§
+Ø®ÙØ§Ù
+Ø¯ÙÙ
+Ø¯ÙÙÙ
+Ø°Ø§
+Ø°Ø§Øª
+Ø°Ø§Ù
+Ø°Ø§ÙÙ
+Ø°Ø§ÙÙ
+Ø°ÙÙ
+Ø°ÙÙÙ
+Ø°ÙÙÙØ§
+Ø°ÙÙÙ
+Ø°Ù
+Ø°ÙØ§
+Ø°ÙØ§ØªØ§
+Ø°ÙØ§ØªÙ
+Ø°ÙØª
+Ø°ÙÙÙ
+Ø°ÙÙÙÙÙ
+Ø°ÙÙ
+Ø°ÙÙ
+Ø±Ø§Ø­
+Ø±Ø¬Ø¹
+Ø±ÙÙØ¯Ù
+Ø±ÙØ«
+Ø±ÙØ¨ÙÙ
+Ø²ÙØ§Ø±Ø©
+Ø³Ø¨Ø­Ø§Ù
+Ø³Ø±Ø¹Ø§Ù
+Ø³ÙØ©
+Ø³ÙÙØ§Øª
+Ø³ÙÙ
+Ø³ÙÙ
+Ø³ÙØ§Ø¡Ù
+Ø³ÙØ§Ø¡ÙÙÙØ§
+Ø´Ø¨Ù
+Ø´Ø®ØµØ§
+Ø´Ø±Ø¹
+Ø´ÙØªÙÙØ§ÙÙ
+ØµØ§Ø±
+ØµØ¨Ø§Ø­
+ØµÙØ±
+ØµÙÙ
+ØµÙÙ
+Ø¶Ø¯
+Ø¶ÙÙ
+Ø·Ø§Ù
+Ø·Ø§ÙÙØ§
+Ø·ÙÙ
+Ø·ÙÙ
+Ø¸ÙÙ
+Ø¹Ø§Ø¯
+Ø¹Ø§Ù
+Ø¹Ø§ÙØ§
+Ø¹Ø§ÙØ©
+Ø¹Ø¯Ø§
+Ø¹Ø¯Ø©
+Ø¹Ø¯Ø¯
+Ø¹Ø¯Ù
+Ø¹Ø³Ù
+Ø¹Ø´Ø±
+Ø¹Ø´Ø±Ø©
+Ø¹ÙÙ
+Ø¹ÙÙ
+Ø¹ÙÙÙ
+Ø¹ÙÙÙ
+Ø¹ÙÙÙØ§
+Ø¹ÙÙÙ
+Ø¹Ù
+Ø¹ÙØ¯
+Ø¹ÙØ¯ÙØ§
+Ø¹ÙØ¶
+Ø¹ÙÙ
+Ø¹ÙØ¯ÙØ³Ù
+Ø¹ÙÙÙÙØ§
+ØºØ¯Ø§
+ØºÙØ±
+Ù
+Ù
+ÙØ§Ù
+ÙÙØ§Ù
+ÙÙ
+ÙÙ
+ÙÙ
+ÙÙÙ
+ÙÙÙØ§
+ÙÙÙ
+ÙÙÙØ§
+ÙØ§Ù
+ÙØ§Ù
+ÙØ¨Ù
+ÙØ¯
+ÙØ·Ù
+ÙÙÙØ§
+ÙÙØ©
+ÙØ£ÙÙÙØ§
+ÙØ£ÙÙ
+ÙØ£ÙÙ
+ÙØ£ÙÙÙ
+ÙØ§Ø¯
+ÙØ§Ù
+ÙØ§ÙØª
+ÙØ°Ø§
+ÙØ°ÙÙ
+ÙØ±Ø¨
+ÙÙ
+ÙÙØ§
+ÙÙØ§ÙÙØ§
+ÙÙØªØ§
+ÙÙÙ
+ÙÙÙÙÙØ§
+ÙÙÙÙÙØ§
+ÙÙÙÙØ§
+ÙÙÙÙØ§
+ÙÙ
+ÙÙØ§
+ÙÙ
+ÙÙØª
+ÙÙÙ
+ÙÙÙÙØ§
+ÙÙØ£ÙÙÙÙ
+ÙÙØ®
+ÙØ¦Ù
+ÙØ§
+ÙØ§Øª
+ÙØ§Ø³ÙÙØ§
+ÙØ¯Ù
+ÙØ¯Ù
+ÙØ¹ÙØ±
+ÙÙØ§Ø¡
+ÙÙ
+ÙÙÙ
+ÙÙÙØ§
+ÙÙÙ
+ÙÙÙÙÙÙØ§
+ÙÙÙ
+ÙÙÙÙØ§
+ÙÙØ§ÙÙ
+ÙÙ
+ÙÙØ§
+ÙÙÙØ§
+ÙÙ
+ÙÙØ§
+ÙÙ
+ÙÙØ§
+ÙÙ
+ÙÙÙØ§ÙØ©
+ÙÙÙØ§
+ÙÙÙØ§
+ÙÙ
+ÙÙØ³ÙØªÙ
+ÙÙØ³ÙØªÙ
+ÙÙØ³ÙØªÙÙ
+ÙÙØ³ÙØªÙÙÙØ§
+ÙÙØ³ÙØªÙÙÙÙ
+ÙÙØ³ÙØªÙ
+ÙÙØ³ÙÙÙ
+ÙÙØ¹ÙÙÙÙ
+ÙÙÙÙÙÙÙ
+ÙÙÙÙØªÙ
+ÙÙÙÙØ³Ù
+ÙÙÙÙØ³ÙØ§
+ÙÙÙÙØ³ÙØªÙØ§
+ÙÙÙÙØ³ÙØªÙ
+ÙÙÙÙØ³ÙÙØ§
+ÙÙÙØ³ÙÙÙØ§
+ÙØ§
+ÙØ§Ø§ÙÙÙ
+ÙØ§Ø¨Ø±Ø­
+ÙØ§Ø¯Ø§Ù
+ÙØ§Ø°Ø§
+ÙØ§Ø²Ø§Ù
+ÙØ§ÙØªØ¦
+ÙØ§ÙÙ
+ÙØªÙ
+ÙØ«Ù
+ÙØ°
+ÙØ³Ø§Ø¡
+ÙØ¹
+ÙØ¹Ø§Ø°
+ÙÙØ§Ø¨Ù
+ÙÙØ§ÙÙÙ
+ÙÙØ§ÙÙÙØ§
+ÙÙØ§ÙÙÙÙ
+ÙÙØ§ÙÙÙ
+ÙÙÙØ§Ø±
+ÙÙÙÙÙ
+ÙÙØ§
+ÙÙÙ
+ÙÙ
+ÙÙØ°
+ÙÙÙØ§
+ÙÙ
+ÙÙÙØ§
+ÙÙÙÙ
+ÙÙÙ
+ÙØ­Ù
+ÙØ­Ù
+ÙØ¹Ù
+ÙÙØ³
+ÙÙØ³Ù
+ÙÙØ§ÙØ©
+ÙÙØ®Ù
+ÙÙØ¹ÙÙÙØ§
+ÙÙØ¹ÙÙÙ
+ÙØ§
+ÙØ§Ø¤Ù
+ÙØ§ÙÙ
+ÙØ§ÙÙØ§
+ÙØ¨Ù
+ÙØ°Ø§
+ÙØ°Ù
+ÙÙØ°Ø§
+ÙÙ
+ÙÙÙÙÙ
+ÙÙÙØ§
+ÙÙ
+ÙÙØ§
+ÙÙ
+ÙÙØ§
+ÙÙØ§Ù
+ÙÙØ§ÙÙ
+ÙÙ
+ÙÙ
+ÙÙØ§
+ÙÙØª
+ÙÙÙØ§
+ÙÙØ¤ÙØ§Ø¡
+ÙÙØ§ØªØ§ÙÙ
+ÙÙØ§ØªÙÙÙÙÙ
+ÙÙØ§ØªÙÙ
+ÙÙØ§ØªÙÙ
+ÙÙØ¬Ù
+ÙÙØ°Ø§
+ÙÙØ°Ø§ÙÙ
+ÙÙØ°ÙÙÙÙÙ
+ÙÙØ°ÙÙ
+ÙÙØ°ÙÙ
+ÙÙÙÙÙÙØ§ØªÙ
+Ù
+Ù6
+ÙØ§
+ÙØ§Ø­Ø¯
+ÙØ§Ø¶Ø§Ù
+ÙØ§Ø¶Ø§ÙØª
+ÙØ§ÙØ¯
+ÙØ§Ù
+ÙØ§ÙØ§Ù
+ÙØ§ÙØ¶Ø­
+ÙØ±Ø§Ø¡ÙÙ
+ÙÙÙ
+ÙÙØ§Ù
+ÙÙØ§ÙØª
+ÙÙØ¯
+ÙÙÙ
+ÙÙØ§Ù
+ÙÙØ§ÙØª
+ÙÙØ§
+ÙÙÙ
+ÙÙÙ
+ÙÙÙ
+ÙÙÙ
+ÙÙÙØ£ÙÙ
+ÙÙÙÙ
+ÙÙØ´ÙÙÙØ§ÙÙÙ
+ÙÙÙÙ
+ÙÙÙÙ
+ÙÙÙ
+ÙØ£ÙÙØ§Ù
\ No newline at end of file
diff --git a/static/stopwords/bg b/static/stopwords/bg
new file mode 100644
index 0000000..eebe119
--- /dev/null
+++ b/static/stopwords/bg
@@ -0,0 +1,259 @@
+Ð°
+Ð°Ð²ÑÐµÐ½ÑÐ¸ÑÐµÐ½
+Ð°Ð·
+Ð°ÐºÐ¾
+Ð°Ð»Ð°
+Ð±Ðµ
+Ð±ÐµÐ·
+Ð±ÐµÑÐµ
+Ð±Ð¸
+Ð±Ð¸Ð²Ñ
+Ð±Ð¸Ð²ÑÐ°
+Ð±Ð¸Ð²ÑÐ¾
+Ð±Ð¸Ð»
+Ð±Ð¸Ð»Ð°
+Ð±Ð¸Ð»Ð¸
+Ð±Ð¸Ð»Ð¾
+Ð±Ð»Ð°Ð³Ð¾Ð´Ð°ÑÑ
+Ð±Ð»Ð¸Ð·Ð¾
+Ð±ÑÐ´Ð°Ñ
+Ð±ÑÐ´Ðµ
+Ð±ÑÑÐ°
+Ð²
+Ð²Ð°Ñ
+Ð²Ð°Ñ
+Ð²Ð°ÑÐ°
+Ð²ÐµÑÐ¾ÑÑÐ½Ð¾
+Ð²ÐµÑÐµ
+Ð²Ð·ÐµÐ¼Ð°
+Ð²Ð¸
+Ð²Ð¸Ðµ
+Ð²Ð¸Ð½Ð°Ð³Ð¸
+Ð²Ð½Ð¸Ð¼Ð°Ð²Ð°
+Ð²ÑÐµÐ¼Ðµ
+Ð²ÑÐµ
+Ð²ÑÐµÐºÐ¸
+Ð²ÑÐ¸ÑÐºÐ¸
+Ð²ÑÐ¸ÑÐºÐ¾
+Ð²ÑÑÐºÐ°
+Ð²ÑÐ²
+Ð²ÑÐ¿ÑÐµÐºÐ¸
+Ð²ÑÑÑÑ
+Ð³
+Ð³Ð¸
+Ð³Ð»Ð°Ð²ÐµÐ½
+Ð³Ð»Ð°Ð²Ð½Ð°
+Ð³Ð»Ð°Ð²Ð½Ð¾
+Ð³Ð»Ð°Ñ
+Ð³Ð¾
+Ð³Ð¾Ð´Ð¸Ð½Ð°
+Ð³Ð¾Ð´Ð¸Ð½Ð¸
+Ð³Ð¾Ð´Ð¸ÑÐµÐ½
+Ð´
+Ð´Ð°
+Ð´Ð°Ð»Ð¸
+Ð´Ð²Ð°
+Ð´Ð²Ð°Ð¼Ð°
+Ð´Ð²Ð°Ð¼Ð°ÑÐ°
+Ð´Ð²Ðµ
+Ð´Ð²ÐµÑÐµ
+Ð´ÐµÐ½
+Ð´Ð½ÐµÑ
+Ð´Ð½Ð¸
+Ð´Ð¾
+Ð´Ð¾Ð±ÑÐ°
+Ð´Ð¾Ð±ÑÐµ
+Ð´Ð¾Ð±ÑÐ¾
+Ð´Ð¾Ð±ÑÑ
+Ð´Ð¾ÐºÐ°ÑÐ¾
+Ð´Ð¾ÐºÐ¾Ð³Ð°
+Ð´Ð¾ÑÐ¸
+Ð´Ð¾ÑÐµÐ³Ð°
+Ð´Ð¾ÑÑÐ°
+Ð´ÑÑÐ³
+Ð´ÑÑÐ³Ð°
+Ð´ÑÑÐ³Ð¸
+Ðµ
+ÐµÐ²ÑÐ¸Ð½
+ÐµÐ´Ð²Ð°
+ÐµÐ´Ð¸Ð½
+ÐµÐ´Ð½Ð°
+ÐµÐ´Ð½Ð°ÐºÐ²Ð°
+ÐµÐ´Ð½Ð°ÐºÐ²Ð¸
+ÐµÐ´Ð½Ð°ÐºÑÐ²
+ÐµÐ´Ð½Ð¾
+ÐµÐºÐ¸Ð¿
+ÐµÑÐ¾
+Ð¶Ð¸Ð²Ð¾Ñ
+Ð·Ð°
+Ð·Ð°Ð±Ð°Ð²ÑÐ¼
+Ð·Ð°Ð´
+Ð·Ð°ÐµÐ´Ð½Ð¾
+Ð·Ð°ÑÐ°Ð´Ð¸
+Ð·Ð°ÑÐµÐ³Ð°
+Ð·Ð°ÑÐ¿Ð°Ð»
+Ð·Ð°ÑÐ¾Ð²Ð°
+Ð·Ð°ÑÐ¾
+Ð·Ð°ÑÐ¾ÑÐ¾
+Ð¸
+Ð¸Ð·
+Ð¸Ð»Ð¸
+Ð¸Ð¼
+Ð¸Ð¼Ð°
+Ð¸Ð¼Ð°Ñ
+Ð¸ÑÐºÐ°
+Ð¹
+ÐºÐ°Ð·Ð°
+ÐºÐ°Ðº
+ÐºÐ°ÐºÐ²Ð°
+ÐºÐ°ÐºÐ²Ð¾
+ÐºÐ°ÐºÑÐ¾
+ÐºÐ°ÐºÑÐ²
+ÐºÐ°ÑÐ¾
+ÐºÐ¾Ð³Ð°
+ÐºÐ¾Ð³Ð°ÑÐ¾
+ÐºÐ¾ÐµÑÐ¾
+ÐºÐ¾Ð¸ÑÐ¾
+ÐºÐ¾Ð¹
+ÐºÐ¾Ð¹ÑÐ¾
+ÐºÐ¾Ð»ÐºÐ¾
+ÐºÐ¾ÑÑÐ¾
+ÐºÑÐ´Ðµ
+ÐºÑÐ´ÐµÑÐ¾
+ÐºÑÐ¼
+Ð»ÐµÑÐµÐ½
+Ð»ÐµÑÐ½Ð¾
+Ð»Ð¸
+Ð»Ð¾Ñ
+Ð¼
+Ð¼Ð°Ð¹
+Ð¼Ð°Ð»ÐºÐ¾
+Ð¼Ðµ
+Ð¼ÐµÐ¶Ð´Ñ
+Ð¼ÐµÐº
+Ð¼ÐµÐ½
+Ð¼ÐµÑÐµÑ
+Ð¼Ð¸
+Ð¼Ð½Ð¾Ð³Ð¾
+Ð¼Ð½Ð¾Ð·Ð¸Ð½Ð°
+Ð¼Ð¾Ð³Ð°
+Ð¼Ð¾Ð³Ð°Ñ
+Ð¼Ð¾Ð¶Ðµ
+Ð¼Ð¾ÐºÑÑ
+Ð¼Ð¾Ð»Ñ
+Ð¼Ð¾Ð¼ÐµÐ½ÑÐ°
+Ð¼Ñ
+Ð½
+Ð½Ð°
+Ð½Ð°Ð´
+Ð½Ð°Ð·Ð°Ð´
+Ð½Ð°Ð¹
+Ð½Ð°Ð¿ÑÐ°Ð²Ð¸
+Ð½Ð°Ð¿ÑÐµÐ´
+Ð½Ð°Ð¿ÑÐ¸Ð¼ÐµÑ
+Ð½Ð°Ñ
+Ð½Ðµ
+Ð½ÐµÐ³Ð¾
+Ð½ÐµÑÐ¾
+Ð½ÐµÑ
+Ð½Ð¸
+Ð½Ð¸Ðµ
+Ð½Ð¸ÐºÐ¾Ð¹
+Ð½Ð¸ÑÐ¾
+Ð½Ð¸ÑÐ¾
+Ð½Ð¾
+Ð½Ð¾Ð²
+Ð½Ð¾Ð²Ð°
+Ð½Ð¾Ð²Ð¸
+Ð½Ð¾Ð²Ð¸Ð½Ð°
+Ð½ÑÐºÐ¾Ð¸
+Ð½ÑÐºÐ¾Ð¹
+Ð½ÑÐºÐ¾Ð»ÐºÐ¾
+Ð½ÑÐ¼Ð°
+Ð¾Ð±Ð°ÑÐµ
+Ð¾ÐºÐ¾Ð»Ð¾
+Ð¾ÑÐ²ÐµÐ½
+Ð¾ÑÐ¾Ð±ÐµÐ½Ð¾
+Ð¾Ñ
+Ð¾ÑÐ³Ð¾ÑÐµ
+Ð¾ÑÐ½Ð¾Ð²Ð¾
+Ð¾ÑÐµ
+Ð¿Ð°Ðº
+Ð¿Ð¾
+Ð¿Ð¾Ð²ÐµÑÐµ
+Ð¿Ð¾Ð²ÐµÑÐµÑÐ¾
+Ð¿Ð¾Ð´
+Ð¿Ð¾Ð½Ðµ
+Ð¿Ð¾ÑÐ°Ð´Ð¸
+Ð¿Ð¾ÑÐ»Ðµ
+Ð¿Ð¾ÑÑÐ¸
+Ð¿ÑÐ°Ð²Ð¸
+Ð¿ÑÐµÐ´
+Ð¿ÑÐµÐ´Ð¸
+Ð¿ÑÐµÐ·
+Ð¿ÑÐ¸
+Ð¿ÑÐº
+Ð¿ÑÑÐ²Ð°ÑÐ°
+Ð¿ÑÑÐ²Ð¸
+Ð¿ÑÑÐ²Ð¾
+Ð¿ÑÑÐ¸
+ÑÐ°Ð²ÐµÐ½
+ÑÐ°Ð²Ð½Ð°
+Ñ
+ÑÐ°
+ÑÐ°Ð¼
+ÑÐ°Ð¼Ð¾
+ÑÐµ
+ÑÐµÐ³Ð°
+ÑÐ¸
+ÑÐ¸Ð½
+ÑÐºÐ¾ÑÐ¾
+ÑÐ»ÐµÐ´
+ÑÐ»ÐµÐ´Ð²Ð°Ñ
+ÑÐ¼Ðµ
+ÑÐ¼ÑÑ
+ÑÐ¿Ð¾ÑÐµÐ´
+ÑÑÐµÐ´
+ÑÑÐµÑÑ
+ÑÑÐµ
+ÑÑÐ¼
+ÑÑÑ
+ÑÑÑÐ¾
+Ñ
+Ñ.Ð½.
+ÑÐ°Ð·Ð¸
+ÑÐ°ÐºÐ°
+ÑÐ°ÐºÐ¸Ð²Ð°
+ÑÐ°ÐºÑÐ²
+ÑÐ°Ð¼
+ÑÐ²Ð¾Ð¹
+ÑÐµ
+ÑÐµÐ·Ð¸
+ÑÐ¸
+ÑÐ¾
+ÑÐ¾Ð²Ð°
+ÑÐ¾Ð³Ð°Ð²Ð°
+ÑÐ¾Ð·Ð¸
+ÑÐ¾Ð¹
+ÑÐ¾Ð»ÐºÐ¾Ð²Ð°
+ÑÐ¾ÑÐ½Ð¾
+ÑÑÐ¸
+ÑÑÑÐ±Ð²Ð°
+ÑÑÐº
+ÑÑÐ¹
+ÑÑ
+ÑÑÑ
+Ñ
+ÑÑÑÐµ
+ÑÐ°ÑÐµÑÐ²Ð°
+ÑÐ¸Ð»ÑÐ´Ð¸
+Ñ
+ÑÐ°ÑÐ°
+ÑÐµ
+ÑÐµÑÑÐ¾
+ÑÑÐµÐ·
+ÑÐµ
+ÑÐ¾Ð¼
+ÑÐ¼ÑÑÐº
+Ñ
+ÑÐº
\ No newline at end of file
diff --git a/static/stopwords/bn b/static/stopwords/bn
new file mode 100644
index 0000000..9dc1bfc
--- /dev/null
+++ b/static/stopwords/bn
@@ -0,0 +1,398 @@
+à¦à¦¤à¦à¦¬
+à¦à¦¥à¦
+à¦à¦¥à¦¬à¦¾
+à¦à¦¨à§à¦¯à¦¾à¦¯à¦¼à§
+à¦à¦¨à§à¦
+à¦à¦¨à§à¦à§
+à¦à¦¨à§à¦à§à¦
+à¦à¦¨à§à¦¤à¦¤
+à¦à¦¨à§à¦¯
+à¦à¦¬à¦§à¦¿
+à¦à¦¬à¦¶à§à¦¯
+à¦à¦°à§à¦¥à¦¾à¦¤
+à¦à¦
+à¦à¦à¦¾à¦®à§
+à¦à¦à§
+à¦à¦à§à¦
+à¦à¦à§
+à¦à¦
+à¦à¦¦à§à¦¯à¦­à¦¾à¦à§
+à¦à¦ªà¦¨à¦¾à¦°
+à¦à¦ªà¦¨à¦¿
+à¦à¦¬à¦¾à¦°
+à¦à¦®à¦°à¦¾
+à¦à¦®à¦¾à¦à§
+à¦à¦®à¦¾à¦¦à§à¦°
+à¦à¦®à¦¾à¦°
+à¦à¦®à¦¿
+à¦à¦°
+à¦à¦°à¦
+à¦
+à¦à¦¤à§à¦¯à¦¾à¦¦à¦¿
+à¦à¦¹à¦¾
+à¦à¦à¦¿à¦¤
+à¦à¦¤à§à¦¤à¦°
+à¦à¦¨à¦¿
+à¦à¦ªà¦°
+à¦à¦ªà¦°à§
+à¦
+à¦à¦à¦¦à§à¦°
+à¦à¦à¦°à¦¾
+à¦à¦
+à¦à¦à¦
+à¦à¦à¦à¦¿
+à¦à¦à¦¬à¦¾à¦°
+à¦à¦à§
+à¦à¦à§
+à¦à¦à¦¨
+à¦à¦à¦¨à¦
+à¦à¦à¦¾à¦¨à§
+à¦à¦à¦¾à¦¨à§à¦
+à¦à¦à¦¾
+à¦à¦à¦¾à¦
+à¦à¦à¦¿
+à¦à¦¤
+à¦à¦¤à¦à¦¾à¦
+à¦à¦¤à§
+à¦à¦¦à§à¦°
+à¦à¦¬
+à¦à¦¬à¦
+à¦à¦¬à¦¾à¦°
+à¦à¦®à¦¨
+à¦à¦®à¦¨à¦à§
+à¦à¦®à¦¨à¦¿
+à¦à¦°
+à¦à¦°à¦¾
+à¦à¦²
+à¦à¦¸
+à¦à¦¸à§
+à¦
+à¦
+à¦à¦à¦¦à§à¦°
+à¦à¦à¦°
+à¦à¦à¦°à¦¾
+à¦à¦
+à¦à¦à§
+à¦à¦à¦¾à¦¨à§
+à¦à¦¦à§à¦°
+à¦à¦°
+à¦à¦°à¦¾
+à¦à¦à¦¨à¦
+à¦à¦¤
+à¦à¦¬à§
+à¦à¦®à¦¨à§
+à¦à¦¯à¦¼à§à¦
+à¦à¦¯à¦¼à§à¦à¦à¦¿
+à¦à¦°à¦à§
+à¦à¦°à¦à§à¦¨
+à¦à¦°à¦¤à§
+à¦à¦°à¦¬à§
+à¦à¦°à¦¬à§à¦¨
+à¦à¦°à¦²à§
+à¦à¦°à¦²à§à¦¨
+à¦à¦°à¦¾
+à¦à¦°à¦¾à¦
+à¦à¦°à¦¾à¦¯à¦¼
+à¦à¦°à¦¾à¦°
+à¦à¦°à¦¿
+à¦à¦°à¦¿à¦¤à§
+à¦à¦°à¦¿à¦¯à¦¼à¦¾
+à¦à¦°à¦¿à¦¯à¦¼à§
+à¦à¦°à§
+à¦à¦°à§à¦
+à¦à¦°à§à¦à¦¿à¦²à§à¦¨
+à¦à¦°à§à¦à§
+à¦à¦°à§à¦à§à¦¨
+à¦à¦°à§à¦¨
+à¦à¦¾à¦à¦à§
+à¦à¦¾à¦
+à¦à¦¾à¦à§
+à¦à¦¾à¦
+à¦à¦¾à¦à§
+à¦à¦¾à¦°à¦
+à¦à¦¾à¦°à¦£
+à¦à¦¿
+à¦à¦¿à¦à¦¬à¦¾
+à¦à¦¿à¦à§
+à¦à¦¿à¦à§à¦
+à¦à¦¿à¦¨à§à¦¤à§
+à¦à§
+à¦à§
+à¦à§à¦
+à¦à§à¦à¦
+à¦à§à¦à¦¾
+à¦à§à¦¨
+à¦à§à¦à¦¿
+à¦à§à¦¨
+à¦à§à¦¨à¦
+à¦à§à¦¨à§
+à¦à§à¦·à§à¦¤à§à¦°à§
+à¦à§à§à¦
+à¦à§à¦¬
+à¦à¦¿à¦¯à¦¼à§
+à¦à¦¿à¦¯à¦¼à§à¦à§
+à¦à¦¿à§à§
+à¦à§à¦²à¦¿
+à¦à§à¦à§
+à¦à§à¦²
+à¦à§à¦²à§
+à¦à§à¦à¦¾
+à¦à¦²à§
+à¦à¦¾à¦¨
+à¦à¦¾à¦¯à¦¼
+à¦à¦¾à¦°
+à¦à¦¾à¦²à§
+à¦à§à¦¯à¦¼à§
+à¦à§à¦·à§à¦à¦¾
+à¦à¦¾à¦¡à¦¼à¦¾
+à¦à¦¾à¦¡à¦¼à¦¾à¦
+à¦à¦¿à¦²
+à¦à¦¿à¦²à§à¦¨
+à¦à¦¨
+à¦à¦¨à¦à§
+à¦à¦¨à§à¦°
+à¦à¦¨à§à¦¯
+à¦à¦¨à§à¦¯à¦à¦à§
+à¦à¦¾à¦¨à¦¤à§
+à¦à¦¾à¦¨à¦¾
+à¦à¦¾à¦¨à¦¾à¦¨à§
+à¦à¦¾à¦¨à¦¾à¦¯à¦¼
+à¦à¦¾à¦¨à¦¿à¦¯à¦¼à§
+à¦à¦¾à¦¨à¦¿à¦¯à¦¼à§à¦à§
+à¦à§
+à¦à§à¦¨à¦à¦¨
+à¦à¦¿
+à¦ à¦¿à¦
+à¦¤à¦à¦¨
+à¦¤à¦¤
+à¦¤à¦¥à¦¾
+à¦¤à¦¬à§
+à¦¤à¦¬à§
+à¦¤à¦¾
+à¦¤à¦¾à¦à¦à§
+à¦¤à¦¾à¦à¦¦à§à¦°
+à¦¤à¦¾à¦à¦°
+à¦¤à¦¾à¦à¦°à¦¾
+à¦¤à¦¾à¦à¦¾à¦¹à¦¾à¦°à¦¾
+à¦¤à¦¾à¦
+à¦¤à¦¾à¦
+à¦¤à¦¾à¦à§
+à¦¤à¦¾à¦¤à§
+à¦¤à¦¾à¦¦à§à¦°
+à¦¤à¦¾à¦°
+à¦¤à¦¾à¦°à¦ªà¦°
+à¦¤à¦¾à¦°à¦¾
+à¦¤à¦¾à¦°à§
+à¦¤à¦¾à¦¹à¦²à§
+à¦¤à¦¾à¦¹à¦¾
+à¦¤à¦¾à¦¹à¦¾à¦¤à§
+à¦¤à¦¾à¦¹à¦¾à¦°
+à¦¤à¦¿à¦¨à¦
+à¦¤à¦¿à¦¨à¦¿
+à¦¤à¦¿à¦¨à¦¿à¦
+à¦¤à§à¦®à¦¿
+à¦¤à§à¦²à§
+à¦¤à§à¦®à¦¨
+à¦¤à§
+à¦¤à§à¦®à¦¾à¦°
+à¦¥à¦¾à¦à¦¬à§
+à¦¥à¦¾à¦à¦¬à§à¦¨
+à¦¥à¦¾à¦à¦¾
+à¦¥à¦¾à¦à¦¾à¦¯à¦¼
+à¦¥à¦¾à¦à§
+à¦¥à¦¾à¦à§à¦¨
+à¦¥à§à¦à§
+à¦¥à§à¦à§à¦
+à¦¥à§à¦à§à¦
+à¦¦à¦¿à¦à§
+à¦¦à¦¿à¦¤à§
+à¦¦à¦¿à¦¨
+à¦¦à¦¿à¦¯à¦¼à§
+à¦¦à¦¿à¦¯à¦¼à§à¦à§
+à¦¦à¦¿à¦¯à¦¼à§à¦à§à¦¨
+à¦¦à¦¿à¦²à§à¦¨
+à¦¦à§
+à¦¦à§à¦
+à¦¦à§à¦à¦¿
+à¦¦à§à¦à§
+à¦¦à§à¦à¦¯à¦¼à¦¾
+à¦¦à§à¦à¦¯à¦¼à¦¾à¦°
+à¦¦à§à¦à§à¦¾
+à¦¦à§à¦à¦¤à§
+à¦¦à§à¦à¦¾
+à¦¦à§à¦à§
+à¦¦à§à¦¨
+à¦¦à§à¦¯à¦¼
+à¦¦à§à¦¬à¦¾à¦°à¦¾
+à¦§à¦°à¦¾
+à¦§à¦°à§
+à¦§à¦¾à¦®à¦¾à¦°
+à¦¨à¦¤à§à¦¨
+à¦¨à¦¯à¦¼
+à¦¨à¦¾
+à¦¨à¦¾à¦
+à¦¨à¦¾à¦à¦¿
+à¦¨à¦¾à¦à¦¾à¦¦
+à¦¨à¦¾à¦¨à¦¾
+à¦¨à¦¿à¦à§
+à¦¨à¦¿à¦à§à¦
+à¦¨à¦¿à¦à§à¦¦à§à¦°
+à¦¨à¦¿à¦à§à¦°
+à¦¨à¦¿à¦¤à§
+à¦¨à¦¿à¦¯à¦¼à§
+à¦¨à¦¿à§à§
+à¦¨à§à¦
+à¦¨à§à¦à¦¯à¦¼à¦¾
+à¦¨à§à¦à¦¯à¦¼à¦¾à¦°
+à¦¨à§à¦à§à¦¾
+à¦¨à§
+à¦ªà¦à§à¦·à§
+à¦ªà¦°
+à¦ªà¦°à§
+à¦ªà¦°à§à¦
+à¦ªà¦°à§à¦
+à¦ªà¦°à§à¦¯à¦¨à§à¦¤
+à¦ªà¦¾à¦à¦¯à¦¼à¦¾
+à¦ªà¦¾à¦
+à¦ªà¦¾à¦°à¦¿
+à¦ªà¦¾à¦°à§
+à¦ªà¦¾à¦°à§à¦¨
+à¦ªà¦¿
+à¦ªà§à¦¯à¦¼à§
+à¦ªà§à§à§à¦°à§
+à¦ªà§à¦°à¦¤à¦¿
+à¦ªà§à¦°à¦¥à¦®
+à¦ªà§à¦°à¦­à§à¦¤à¦¿
+à¦ªà§à¦°à¦¯à¦¨à§à¦¤
+à¦ªà§à¦°à¦¾à¦¥à¦®à¦¿à¦
+à¦ªà§à¦°à¦¾à¦¯à¦¼
+à¦ªà§à¦°à¦¾à§
+à¦«à¦²à§
+à¦«à¦¿à¦°à§
+à¦«à§à¦°
+à¦¬à¦à§à¦¤à¦¬à§à¦¯
+à¦¬à¦¦à¦²à§
+à¦¬à¦¨
+à¦¬à¦°à¦
+à¦¬à¦²à¦¤à§
+à¦¬à¦²à¦²
+à¦¬à¦²à¦²à§à¦¨
+à¦¬à¦²à¦¾
+à¦¬à¦²à§
+à¦¬à¦²à§à¦à§à¦¨
+à¦¬à¦²à§à¦¨
+à¦¬à¦¸à§
+à¦¬à¦¹à§
+à¦¬à¦¾
+à¦¬à¦¾à¦¦à§
+à¦¬à¦¾à¦°
+à¦¬à¦¿
+à¦¬à¦¿à¦¨à¦¾
+à¦¬à¦¿à¦­à¦¿à¦¨à§à¦¨
+à¦¬à¦¿à¦¶à§à¦·
+à¦¬à¦¿à¦·à¦¯à¦¼à¦à¦¿
+à¦¬à§à¦¶
+à¦¬à§à¦¶à¦¿
+à¦¬à§à¦¯à¦¬à¦¹à¦¾à¦°
+à¦¬à§à¦¯à¦¾à¦ªà¦¾à¦°à§
+à¦­à¦¾à¦¬à§
+à¦­à¦¾à¦¬à§à¦
+à¦®à¦¤à§
+à¦®à¦¤à§à¦
+à¦®à¦§à§à¦¯à¦­à¦¾à¦à§
+à¦®à¦§à§à¦¯à§
+à¦®à¦§à§à¦¯à§à¦
+à¦®à¦§à§à¦¯à§à¦
+à¦®à¦¨à§
+à¦®à¦¾à¦¤à§à¦°
+à¦®à¦¾à¦§à§à¦¯à¦®à§
+à¦®à§à¦
+à¦®à§à¦à§à¦
+à¦¯à¦à¦¨
+à¦¯à¦¤
+à¦¯à¦¤à¦à¦¾
+à¦¯à¦¥à§à¦·à§à¦
+à¦¯à¦¦à¦¿
+à¦¯à¦¦à¦¿à¦
+à¦¯à¦¾
+à¦¯à¦¾à¦à¦°
+à¦¯à¦¾à¦à¦°à¦¾
+à¦¯à¦¾à¦à¦¯à¦¼à¦¾
+à¦¯à¦¾à¦à¦¯à¦¼à¦¾à¦°
+à¦¯à¦¾à¦à§à¦¾
+à¦¯à¦¾à¦à§
+à¦¯à¦¾à¦à§à¦à§
+à¦¯à¦¾à¦¤à§
+à¦¯à¦¾à¦¦à§à¦°
+à¦¯à¦¾à¦¨
+à¦¯à¦¾à¦¬à§
+à¦¯à¦¾à¦¯à¦¼
+à¦¯à¦¾à¦°
+à¦¯à¦¾à¦°à¦¾
+à¦¯à¦¿à¦¨à¦¿
+à¦¯à§
+à¦¯à§à¦à¦¾à¦¨à§
+à¦¯à§à¦¤à§
+à¦¯à§à¦¨
+à¦¯à§à¦®à¦¨
+à¦°
+à¦°à¦à¦®
+à¦°à¦¯à¦¼à§à¦à§
+à¦°à¦¾à¦à¦¾
+à¦°à§à¦à§
+à¦²à¦à§à¦·
+à¦¶à§à¦§à§
+à¦¶à§à¦°à§
+à¦¸à¦à§à¦à§
+à¦¸à¦à§à¦à§à¦
+à¦¸à¦¬
+à¦¸à¦¬à¦¾à¦°
+à¦¸à¦®à¦¸à§à¦¤
+à¦¸à¦®à§à¦ªà§à¦°à¦¤à¦¿
+à¦¸à¦¹
+à¦¸à¦¹à¦¿à¦¤
+à¦¸à¦¾à¦§à¦¾à¦°à¦£
+à¦¸à¦¾à¦®à¦¨à§
+à¦¸à¦¿
+à¦¸à§à¦¤à¦°à¦¾à¦
+à¦¸à§
+à¦¸à§à¦
+à¦¸à§à¦à¦¾à¦¨
+à¦¸à§à¦à¦¾à¦¨à§
+à¦¸à§à¦à¦¾
+à¦¸à§à¦à¦¾à¦
+à¦¸à§à¦à¦¾à¦
+à¦¸à§à¦à¦¿
+à¦¸à§à¦ªà¦·à§à¦
+à¦¸à§à¦¬à¦¯à¦¼à¦
+à¦¹à¦à¦¤à§
+à¦¹à¦à¦¬à§
+à¦¹à¦à¦¯à¦¼à¦¾
+à¦¹à¦à¦¯à¦¼à¦¾
+à¦¹à¦à¦¯à¦¼à¦¾à¦¯à¦¼
+à¦¹à¦à¦¯à¦¼à¦¾à¦°
+à¦¹à¦à§à¦à§
+à¦¹à¦¤
+à¦¹à¦¤à§
+à¦¹à¦¤à§à¦
+à¦¹à¦¨
+à¦¹à¦¬à§
+à¦¹à¦¬à§à¦¨
+à¦¹à¦¯à¦¼
+à¦¹à¦¯à¦¼à¦¤à§
+à¦¹à¦¯à¦¼à¦¨à¦¿
+à¦¹à¦¯à¦¼à§
+à¦¹à¦¯à¦¼à§à¦
+à¦¹à¦¯à¦¼à§à¦à¦¿à¦²
+à¦¹à¦¯à¦¼à§à¦à§
+à¦¹à¦¯à¦¼à§à¦à§à¦¨
+à¦¹à¦²
+à¦¹à¦²à§
+à¦¹à¦²à§à¦
+à¦¹à¦²à§à¦
+à¦¹à¦²à§
+à¦¹à¦¾à¦à¦¾à¦°
+à¦¹à¦¿à¦¸à¦¾à¦¬à§
+à¦¹à§à¦²à§
+à¦¹à§à¦
+à¦¹à§
\ No newline at end of file
diff --git a/static/stopwords/br b/static/stopwords/br
new file mode 100644
index 0000000..ee9addc
--- /dev/null
+++ b/static/stopwords/br
@@ -0,0 +1,1203 @@
+'blam
+'d
+'m
+'r
+'ta
+'vat
+'z
+'zo
+a
+a:
+aba
+abalamour
+abaoe
+ac'hane
+ac'hanoc'h
+ac'hanomp
+ac'hanon
+ac'hanout
+adal
+adalek
+adarre
+ae
+aec'h
+aed
+aemp
+aen
+aent
+aes
+afe
+afec'h
+afed
+afemp
+afen
+afent
+afes
+ag
+ah
+aimp
+aint
+aio
+aiou
+aje
+ajec'h
+ajed
+ajemp
+ajen
+ajent
+ajes
+al
+alato
+alies
+aliesaÃ±
+alkent
+all
+allas
+allo
+allÃ´
+am
+amaÃ±
+amzer
+an
+anezhaÃ±
+anezhe
+anezhi
+anezho
+anvet
+aon
+aotren
+ar
+arall
+araok
+araoki
+araozaÃ±
+araozo
+araozoc'h
+araozomp
+araozon
+araozor
+araozout
+arbenn
+arre
+atalek
+atav
+az
+azalek
+azirazaÃ±
+azirazi
+azirazo
+azirazoc'h
+azirazomp
+azirazon
+azirazor
+azirazout
+b:
+ba
+ba'l
+ba'n
+ba'r
+bad
+bah
+bal
+ban
+bar
+bastaÃ±
+befe
+bell
+benaos
+benn
+bennag
+bennak
+bennozh
+bep
+bepred
+berr
+berzh
+bet
+betek
+betra
+bev
+bevet
+bez
+bezaÃ±
+beze
+bezent
+bezet
+bezh
+bezit
+bezomp
+bihan
+bije
+biou
+biskoazh
+blam
+bo
+boa
+bominapl
+boudoudom
+bouez
+boull
+boum
+bout
+bras
+brasaÃ±
+brav
+bravo
+bremaÃ±
+bres
+brokenn
+bronn
+brrr
+brutal
+buhezek
+c'h:
+c'haout
+c'he
+c'hem
+c'herz
+c'heÃ±ver
+c'hichen
+c'hiz
+c'hoazh
+c'horre
+c'houde
+c'houst
+c'hreiz
+c'hwec'h
+c'hwec'hvet
+c'hwezek
+c'hwi
+ch:
+chaous
+chik
+chit
+chom
+chut
+d'
+d'al
+d'an
+d'ar
+d'az
+d'e
+d'he
+d'ho
+d'hol
+d'hon
+d'hor
+d'o
+d'ober
+d'ul
+d'un
+d'ur
+d:
+da
+dak
+daka
+dal
+dalbezh
+dalc'hmat
+dalit
+damdost
+damheÃ±vel
+damm
+dan
+danvez
+dao
+daol
+daonet
+daou
+daoust
+daouzek
+daouzekvet
+darn
+dastrewiÃ±
+dav
+davedoc'h
+davedomp
+davedon
+davedor
+davedout
+davet
+davetaÃ±
+davete
+daveti
+daveto
+defe
+dehou
+dek
+dekvet
+den
+deoc'h
+deomp
+deor
+derc'hel
+deus
+dez
+deze
+dezhaÃ±
+dezhe
+dezhi
+dezho
+di
+diabarzh
+diagent
+diar
+diaraok
+diavaez
+dibaoe
+dibaot
+dibar
+dic'halaÃ±
+didiac'h
+dienn
+difer
+diganeoc'h
+diganeomp
+diganeor
+diganimp
+diganin
+diganit
+digant
+digantaÃ±
+digante
+diganti
+diganto
+digemmesk
+diget
+digor
+digoret
+dija
+dije
+dimp
+din
+dinaou
+dindan
+dindanaÃ±
+dindani
+dindano
+dindanoc'h
+dindanomp
+dindanon
+dindanor
+dindanout
+dioutaÃ±
+dioute
+diouti
+diouto
+diouzh
+diouzhin
+diouzhit
+diouzhoc'h
+diouzhomp
+diouzhor
+dirak
+dirazaÃ±
+dirazi
+dirazo
+dirazoc'h
+dirazomp
+dirazon
+dirazor
+dirazout
+disheÃ±vel
+dispar
+distank
+dister
+disteraÃ±
+disterig
+distro
+dit
+divaez
+diwar
+diwezhat
+diwezhaÃ±
+do
+doa
+doare
+dont
+dost
+doue
+douetus
+douez
+doug
+draou
+draoÃ±
+dre
+drede
+dreist
+dreistaÃ±
+dreisti
+dreisto
+dreistoc'h
+dreistomp
+dreiston
+dreistor
+dreistout
+drek
+dreÃ±v
+dring
+dro
+du
+e
+e:
+eas
+ebet
+ec'h
+edo
+edoc'h
+edod
+edomp
+edon
+edont
+edos
+eer
+eeun
+efed
+egedoc'h
+egedomp
+egedon
+egedor
+egedout
+eget
+egetaÃ±
+egete
+egeti
+egeto
+eh
+eil
+eilvet
+eizh
+eizhvet
+ejoc'h
+ejod
+ejomp
+ejont
+ejout
+el
+em
+emaint
+emaoc'h
+emaomp
+emaon
+emaout
+emaÃ±
+eme
+emeur
+emezaÃ±
+emezi
+emezo
+emezoc'h
+emezomp
+emezon
+emezout
+emporzhiaÃ±
+en
+end
+endan
+endra
+enep
+ennaÃ±
+enni
+enno
+ennoc'h
+ennomp
+ennon
+ennor
+ennout
+enta
+eo
+eomp
+eont
+eor
+eot
+er
+erbet
+erfin
+esa
+esae
+espar
+estlamm
+estraÃ±j
+eta
+etre
+etreoc'h
+etrezo
+etrezoc'h
+etrezomp
+etrezor
+euh
+eur
+eus
+evel
+evelato
+eveldoc'h
+eveldomp
+eveldon
+eveldor
+eveldout
+evelkent
+eveltaÃ±
+evelte
+evelti
+evelto
+evidoc'h
+evidomp
+evidon
+evidor
+evidout
+evit
+evitaÃ±
+evite
+eviti
+evito
+ez
+eÃ±
+f:
+fac'h
+fall
+fed
+feiz
+fenn
+fezh
+fin
+finsalvet
+foei
+fouilhezaÃ±
+g:
+gallout
+ganeoc'h
+ganeomp
+ganin
+ganit
+gant
+gantaÃ±
+ganti
+ganto
+gaout
+gast
+gein
+gellout
+genndost
+gentaÃ±
+ger
+gerz
+get
+geÃ±ver
+gichen
+gin
+giz
+glan
+gloev
+goll
+gorre
+goude
+gouez
+gouezit
+gouezomp
+goulz
+gounnar
+gour
+goust
+gouze
+gouzout
+gra
+grak
+grec'h
+greiz
+grenn
+greomp
+grit
+groÃ±s
+gutez
+gwall
+gwashoc'h
+gwazh
+gwech
+gwechall
+gwechoÃ¹
+gwell
+gwezh
+gwezhall
+gwezharall
+gwezhoÃ¹
+gwig
+gwirionez
+gwitibunan
+gÃªr
+h:
+ha
+hag
+han
+hanter
+hanterc'hantad
+hanterkantved
+harz
+haÃ±
+haÃ±val
+he
+hebioÃ¹
+hec'h
+hei
+hein
+hem
+hemaÃ±
+hen
+hend
+henhont
+henn
+hennezh
+hent
+hep
+hervez
+hervezaÃ±
+hervezi
+hervezo
+hervezoc'h
+hervezomp
+hervezon
+hervezor
+hervezout
+heul
+heuliaÃ±
+hevelep
+heverk
+heÃ±vel
+heÃ±velat
+heÃ±velaÃ±
+heÃ±veliÃ±
+heÃ±veloc'h
+heÃ±velout
+hi
+hilh
+hini
+hirie
+hirio
+hiziv
+hiziviken
+ho
+hoaliÃ±
+hoc'h
+hogen
+hogos
+hogozik
+hol
+holl
+holÃ 
+homaÃ±
+hon
+honhont
+honnezh
+hont
+hop
+hopala
+hor
+hou
+houp
+hudu
+hue
+hui
+hum
+hurrah
+i
+i:
+in
+int
+is
+ispisial
+isurzhiet
+it
+ivez
+izelaÃ±
+j:
+just
+k:
+kae
+kaer
+kalon
+kalz
+kant
+kaout
+kar
+kazi
+keid
+kein
+keit
+kel
+kellies
+keloÃ¹
+kement
+ken
+kenkent
+kenkoulz
+kenment
+kent
+kentaÃ±
+kentizh
+kentoc'h
+kentre
+ker
+kerkent
+kerz
+kerzh
+ket
+keta
+keÃ±ver
+keÃ±verel
+keÃ±verius
+kichen
+kichenik
+kit
+kiz
+klak
+klek
+klik
+komprenet
+komz
+kont
+korf
+korre
+koulskoude
+koulz
+koust
+krak
+krampouezh
+krec'h
+kreiz
+kuit
+kwir
+l:
+la
+laez
+laoskel
+laouen
+lavar
+lavaret
+lavarout
+lec'h
+lein
+leizh
+lerc'h
+leun
+leuskel
+lew
+lies
+liesaÃ±
+lod
+lusk
+lÃ¢r
+lÃ¢rout
+m:
+ma
+ma'z
+mac'h
+mac'hat
+mac'haÃ±
+mac'hoc'h
+mad
+maez
+maksimal
+mann
+mar
+mard
+marg
+marzh
+mat
+maÃ±
+me
+memes
+memestra
+merkapl
+mersi
+mes
+mesk
+met
+meur
+mil
+minimal
+moan
+moaniaat
+mod
+mont
+mout
+mui
+muiaÃ±
+muioc'h
+n
+n'
+n:
+na
+nag
+naontek
+naturel
+nav
+navet
+ne
+nebeudig
+nebeut
+nebeutaÃ±
+nebeutoc'h
+neketa
+nemedoc'h
+nemedomp
+nemedon
+nemedor
+nemedout
+nemet
+nemetaÃ±
+nemete
+nemeti
+nemeto
+nemeur
+neoac'h
+nepell
+nerzh
+nes
+neseser
+netra
+neubeudoÃ¹
+neuhe
+neuze
+nevez
+newazh
+nez
+ni
+nikun
+niverus
+nul
+o
+o:
+oa
+oac'h
+oad
+oamp
+oan
+oant
+oar
+oas
+ober
+oc'h
+oc'ho
+oc'hola
+oc'hpenn
+oh
+ohe
+ollÃ©
+olole
+olÃ©
+omp
+on
+ordin
+ordinal
+ouejoc'h
+ouejod
+ouejomp
+ouejont
+ouejout
+ouek
+ouezas
+ouezi
+ouezimp
+ouezin
+ouezint
+ouezis
+ouezo
+ouezoc'h
+ouezor
+ouf
+oufe
+oufec'h
+oufed
+oufemp
+oufen
+oufent
+oufes
+ouie
+ouiec'h
+ouied
+ouiemp
+ouien
+ouient
+ouies
+ouije
+ouijec'h
+ouijed
+ouijemp
+ouijen
+ouijent
+ouijes
+out
+outaÃ±
+outi
+outo
+ouzer
+ouzh
+ouzhin
+ouzhit
+ouzhoc'h
+ouzhomp
+ouzhor
+ouzhpenn
+ouzhpennik
+ouzoc'h
+ouzomp
+ouzon
+ouzont
+ouzout
+p'
+p:
+pa
+pad
+padal
+paf
+pan
+panevedeoc'h
+panevedo
+panevedomp
+panevedon
+panevedout
+panevet
+panevetaÃ±
+paneveti
+pas
+paseet
+pe
+peadra
+peder
+pedervet
+pedervetvet
+pefe
+pegeit
+pegement
+pegen
+pegiz
+pegoulz
+pehini
+pelec'h
+pell
+pemod
+pemp
+pempved
+pemzek
+penaos
+penn
+peogwir
+peotramant
+pep
+perak
+perc'hennaÃ±
+pergen
+permetiÃ±
+peseurt
+pet
+petiaoul
+petoare
+petra
+peur
+peurgetket
+peurheÃ±vel
+peurliesaÃ±
+peurvuiaÃ±
+peus
+peustost
+peuz
+pevar
+pevare
+pevarevet
+pevarzek
+pez
+peze
+pezh
+pff
+pfft
+pfut
+picher
+pif
+pife
+pign
+pije
+pikol
+pitiaoul
+piv
+plaouf
+plok
+plouf
+po
+poa
+poelladus
+pof
+pok
+posupl
+pouah
+pourc'henn
+prest
+prestik
+prim
+prin
+provostapl
+pst
+pu
+pur
+r:
+ra
+rae
+raec'h
+raed
+raemp
+raen
+raent
+raes
+rafe
+rafec'h
+rafed
+rafemp
+rafen
+rafent
+rafes
+rag
+raimp
+raint
+raio
+raje
+rajec'h
+rajed
+rajemp
+rajen
+rajent
+rajes
+rak
+ral
+ran
+rankout
+raok
+razh
+re
+reas
+reer
+regennoÃ¹
+reiÃ±
+rejoc'h
+rejod
+rejomp
+rejont
+rejout
+rener
+rentaÃ±
+reoc'h
+reomp
+reont
+reor
+reot
+resis
+ret
+reve
+rez
+ri
+rik
+rin
+ris
+rit
+rouez
+s:
+sac'h
+sant
+sav
+saÃ±set
+se
+sed
+seitek
+seizh
+seizhvet
+sell
+sellit
+ser
+setu
+seul
+seurt
+siwazh
+skignaÃ±
+skoaz
+skouer
+sort
+souden
+souvitaÃ±
+soÃ±j
+speriaÃ±
+spririÃ±
+stad
+stlabezaÃ±
+stop
+stranaÃ±
+strewiÃ±
+strishaat
+stumm
+sujed
+surtoud
+t:
+ta
+taer
+tailh
+tak
+tal
+talvoudegezh
+tamm
+tanav
+taol
+te
+techet
+teir
+teirvet
+telt
+teltenn
+teus
+teut
+teuteu
+ti
+tik
+toa
+tok
+tost
+tostig
+toud
+touesk
+touez
+toull
+tra
+trantenn
+traoÃ±
+trawalc'h
+tre
+trede
+tregont
+tremenet
+tri
+trivet
+triwec'h
+trizek
+tro
+trugarez
+trumm
+tsoin
+tsouin
+tu
+tud
+u:
+ugent
+uhel
+uhelaÃ±
+ul
+un
+unan
+unanez
+unanig
+unnek
+unnekvet
+ur
+urzh
+us
+v:
+va
+vale
+van
+vare
+vat
+vefe
+vefec'h
+vefed
+vefemp
+vefen
+vefent
+vefes
+vesk
+vete
+vez
+vezan
+vezaÃ±
+veze
+vezec'h
+vezed
+vezemp
+vezen
+vezent
+vezer
+vezes
+vezez
+vezit
+vezomp
+vezont
+vi
+vihan
+vihanaÃ±
+vije
+vijec'h
+vijed
+vijemp
+vijen
+vijent
+vijes
+viken
+vimp
+vin
+vint
+vior
+viot
+virviken
+viskoazh
+vlan
+vlaou
+vo
+vod
+voe
+voec'h
+voed
+voemp
+voen
+voent
+voes
+vont
+vostapl
+vrac'h
+vrasaÃ±
+vremaÃ±
+w:
+walc'h
+war
+warnaÃ±
+warni
+warno
+warnoc'h
+warnomp
+warnon
+warnor
+warnout
+wazh
+wech
+wechoÃ¹
+well
+y:
+you
+youadenn
+youc'hadenn
+youc'hou
+z:
+za
+zan
+zaw
+zeu
+zi
+ziar
+zigarez
+ziget
+zindan
+zioc'h
+ziouzh
+zirak
+zivout
+ziwar
+ziwezhaÃ±
+zo
+zoken
+zokenoc'h
+zouesk
+zouez
+zro
+zu
\ No newline at end of file
diff --git a/static/stopwords/ca b/static/stopwords/ca
new file mode 100644
index 0000000..cdba332
--- /dev/null
+++ b/static/stopwords/ca
@@ -0,0 +1,278 @@
+a
+abans
+acÃ­
+ah
+aixÃ­
+aixÃ²
+al
+aleshores
+algun
+alguna
+algunes
+alguns
+alhora
+allÃ 
+allÃ­
+allÃ²
+als
+altra
+altre
+altres
+amb
+ambdues
+ambdÃ³s
+anar
+ans
+apa
+aquell
+aquella
+aquelles
+aquells
+aquest
+aquesta
+aquestes
+aquests
+aquÃ­
+baix
+bastant
+bÃ©
+cada
+cadascuna
+cadascunes
+cadascuns
+cadascÃº
+com
+consegueixo
+conseguim
+conseguir
+consigueix
+consigueixen
+consigueixes
+contra
+d'un
+d'una
+d'unes
+d'uns
+dalt
+de
+del
+dels
+des
+des de
+desprÃ©s
+dins
+dintre
+donat
+doncs
+durant
+e
+eh
+el
+elles
+ells
+els
+em
+en
+encara
+ens
+entre
+era
+erem
+eren
+eres
+es
+esta
+estan
+estat
+estava
+estaven
+estem
+esteu
+estic
+estÃ 
+estÃ vem
+estÃ veu
+et
+etc
+ets
+fa
+faig
+fan
+fas
+fem
+fer
+feu
+fi
+fins
+fora
+gairebÃ©
+ha
+han
+has
+haver
+havia
+he
+hem
+heu
+hi
+ho
+i
+igual
+iguals
+inclÃ²s
+ja
+jo
+l'hi
+la
+les
+li
+li'n
+llarg
+llavors
+m'he
+ma
+mal
+malgrat
+mateix
+mateixa
+mateixes
+mateixos
+me
+mentre
+meu
+meus
+meva
+meves
+mode
+molt
+molta
+moltes
+molts
+mon
+mons
+mÃ©s
+n'he
+n'hi
+ne
+ni
+no
+nogensmenys
+nomÃ©s
+nosaltres
+nostra
+nostre
+nostres
+o
+oh
+oi
+on
+pas
+pel
+pels
+per
+per que
+perquÃ¨
+perÃ²
+poc
+poca
+pocs
+podem
+poden
+poder
+podeu
+poques
+potser
+primer
+propi
+puc
+qual
+quals
+quan
+quant
+que
+quelcom
+qui
+quin
+quina
+quines
+quins
+quÃ¨
+s'ha
+s'han
+sa
+sabem
+saben
+saber
+sabeu
+sap
+saps
+semblant
+semblants
+sense
+ser
+ses
+seu
+seus
+seva
+seves
+si
+sobre
+sobretot
+soc
+solament
+sols
+som
+son
+sons
+sota
+sou
+sÃ³c
+sÃ³n
+t'ha
+t'han
+t'he
+ta
+tal
+tambÃ©
+tampoc
+tan
+tant
+tanta
+tantes
+te
+tene
+tenim
+tenir
+teniu
+teu
+teus
+teva
+teves
+tinc
+ton
+tons
+tot
+tota
+totes
+tots
+un
+una
+unes
+uns
+us
+va
+vaig
+vam
+van
+vas
+veu
+vosaltres
+vostra
+vostre
+vostres
+Ã©rem
+Ã©reu
+Ã©s
+Ã©ssent
+Ãºltim
+Ãºs
\ No newline at end of file
diff --git a/static/stopwords/cs b/static/stopwords/cs
new file mode 100644
index 0000000..5c6f559
--- /dev/null
+++ b/static/stopwords/cs
@@ -0,0 +1,423 @@
+a
+aby
+ahoj
+aj
+ale
+anebo
+ani
+aniÅ¾
+ano
+asi
+aspoÅ
+atd
+atp
+az
+aÄkoli
+aÅ¾
+bez
+beze
+blÃ­zko
+bohuÅ¾el
+brzo
+bude
+budem
+budeme
+budes
+budete
+budeÅ¡
+budou
+budu
+by
+byl
+byla
+byli
+bylo
+byly
+bys
+byt
+bÃ½t
+bÄhem
+chce
+chceme
+chcete
+chceÅ¡
+chci
+chtÃ­t
+chtÄjÃ­
+chut'
+chuti
+ci
+clanek
+clanku
+clanky
+co
+coz
+coÅ¾
+cz
+daleko
+dalsi
+dalÅ¡Ã­
+den
+deset
+design
+devatenÃ¡ct
+devÄt
+dnes
+do
+dobrÃ½
+docela
+dva
+dvacet
+dvanÃ¡ct
+dvÄ
+dÃ¡l
+dÃ¡le
+dÄkovat
+dÄkujeme
+dÄkuji
+email
+ho
+hodnÄ
+i
+jak
+jakmile
+jako
+jakoÅ¾
+jde
+je
+jeden
+jedenÃ¡ct
+jedna
+jedno
+jednou
+jedou
+jeho
+jehoÅ¾
+jej
+jeji
+jejich
+jejÃ­
+jelikoÅ¾
+jemu
+jen
+jenom
+jenÅ¾
+jeste
+jestli
+jestliÅ¾e
+jeÅ¡tÄ
+jeÅ¾
+ji
+jich
+jimi
+jinak
+jine
+jinÃ©
+jiz
+jiÅ¾
+jsem
+jses
+jseÅ¡
+jsi
+jsme
+jsou
+jste
+jÃ¡
+jÃ­
+jÃ­m
+jÃ­Å¾
+jÅ¡te
+k
+kam
+kaÅ¾dÃ½
+kde
+kdo
+kdy
+kdyz
+kdyÅ¾
+ke
+kolik
+kromÄ
+ktera
+ktere
+kteri
+kterou
+ktery
+kterÃ¡
+kterÃ©
+kterÃ½
+kteÅi
+kteÅÃ­
+ku
+kvÅ¯li
+ma
+majÃ­
+mate
+me
+mezi
+mi
+mit
+mne
+mnou
+mnÄ
+moc
+mohl
+mohou
+moje
+moji
+moÅ¾nÃ¡
+muj
+musÃ­
+muze
+my
+mÃ¡
+mÃ¡lo
+mÃ¡m
+mÃ¡me
+mÃ¡te
+mÃ¡Å¡
+mÃ©
+mÃ­
+mÃ­t
+mÄ
+mÅ¯j
+mÅ¯Å¾e
+na
+nad
+nade
+nam
+napiste
+napiÅ¡te
+naproti
+nas
+nasi
+naÄeÅ¾
+naÅ¡e
+naÅ¡i
+ne
+nebo
+nebyl
+nebyla
+nebyli
+nebyly
+nechÅ¥
+nedÄlajÃ­
+nedÄlÃ¡
+nedÄlÃ¡m
+nedÄlÃ¡me
+nedÄlÃ¡te
+nedÄlÃ¡Å¡
+neg
+nejsi
+nejsou
+nemajÃ­
+nemÃ¡me
+nemÃ¡te
+nemÄl
+neni
+nenÃ­
+nestaÄÃ­
+nevadÃ­
+nez
+neÅ¾
+nic
+nich
+nimi
+nove
+novy
+novÃ©
+novÃ½
+nula
+nÃ¡
+nÃ¡m
+nÃ¡mi
+nÃ¡s
+nÃ¡Å¡
+nÃ­
+nÃ­m
+nÄ
+nÄco
+nÄjak
+nÄkde
+nÄkdo
+nÄmu
+nÄmuÅ¾
+o
+od
+ode
+on
+ona
+oni
+ono
+ony
+osm
+osmnÃ¡ct
+pak
+patnÃ¡ct
+po
+pod
+podle
+pokud
+potom
+pouze
+pozdÄ
+poÅÃ¡d
+prave
+pravÃ©
+pred
+pres
+pri
+pro
+proc
+prostÄ
+prosÃ­m
+proti
+proto
+protoze
+protoÅ¾e
+proÄ
+prvni
+prvnÃ­
+prÃ¡ve
+pta
+pÄt
+pÅed
+pÅede
+pÅes
+pÅese
+pÅi
+pÅiÄemÅ¾
+re
+rovnÄ
+s
+se
+sedm
+sedmnÃ¡ct
+si
+sice
+skoro
+smÃ­
+smÄjÃ­
+snad
+spolu
+sta
+sto
+strana
+stÃ©
+sve
+svych
+svym
+svymi
+svÃ©
+svÃ½ch
+svÃ½m
+svÃ½mi
+svÅ¯j
+ta
+tady
+tak
+take
+takhle
+taky
+takze
+takÃ©
+takÅ¾e
+tam
+tamhle
+tamhleto
+tamto
+tato
+te
+tebe
+tebou
+ted'
+tedy
+tema
+ten
+tento
+teto
+ti
+tim
+timto
+tipy
+tisÃ­c
+tisÃ­ce
+to
+tobÄ
+tohle
+toho
+tohoto
+tom
+tomto
+tomu
+tomuto
+toto
+troÅ¡ku
+tu
+tuto
+tvoje
+tvÃ¡
+tvÃ©
+tvÅ¯j
+ty
+tyto
+tÃ©ma
+tÃ©to
+tÃ­m
+tÃ­mto
+tÄ
+tÄm
+tÄma
+tÄmu
+tÅeba
+tÅi
+tÅinÃ¡ct
+u
+urÄitÄ
+uz
+uÅ¾
+v
+vam
+vas
+vase
+vaÅ¡e
+vaÅ¡i
+ve
+vedle
+veÄer
+vice
+vlastnÄ
+vsak
+vy
+vÃ¡m
+vÃ¡mi
+vÃ¡s
+vÃ¡Å¡
+vÃ­ce
+vÅ¡ak
+vÅ¡echen
+vÅ¡echno
+vÅ¡ichni
+vÅ¯bec
+vÅ¾dy
+z
+za
+zatÃ­mco
+zaÄ
+zda
+zde
+ze
+zpet
+zpravy
+zprÃ¡vy
+zpÄt
+Äau
+Äi
+ÄlÃ¡nek
+ÄlÃ¡nku
+ÄlÃ¡nky
+ÄtrnÃ¡ct
+ÄtyÅi
+Å¡est
+Å¡estnÃ¡ct
+Å¾e
\ No newline at end of file
diff --git a/static/stopwords/da b/static/stopwords/da
new file mode 100644
index 0000000..3d441a5
--- /dev/null
+++ b/static/stopwords/da
@@ -0,0 +1,170 @@
+ad
+af
+aldrig
+alle
+alt
+anden
+andet
+andre
+at
+bare
+begge
+blev
+blive
+bliver
+da
+de
+dem
+den
+denne
+der
+deres
+det
+dette
+dig
+din
+dine
+disse
+dit
+dog
+du
+efter
+ej
+eller
+en
+end
+ene
+eneste
+enhver
+er
+et
+far
+fem
+fik
+fire
+flere
+fleste
+for
+fordi
+forrige
+fra
+fÃ¥
+fÃ¥r
+fÃ¸r
+god
+godt
+ham
+han
+hans
+har
+havde
+have
+hej
+helt
+hende
+hendes
+her
+hos
+hun
+hvad
+hvem
+hver
+hvilken
+hvis
+hvor
+hvordan
+hvorfor
+hvornÃ¥r
+i
+ikke
+ind
+ingen
+intet
+ja
+jeg
+jer
+jeres
+jo
+kan
+kom
+komme
+kommer
+kun
+kunne
+lad
+lav
+lidt
+lige
+lille
+man
+mand
+mange
+med
+meget
+men
+mens
+mere
+mig
+min
+mine
+mit
+mod
+mÃ¥
+ned
+nej
+ni
+nogen
+noget
+nogle
+nu
+ny
+nyt
+nÃ¥r
+nÃ¦r
+nÃ¦ste
+nÃ¦sten
+og
+ogsÃ¥
+okay
+om
+op
+os
+otte
+over
+pÃ¥
+se
+seks
+selv
+ser
+ses
+sig
+sige
+sin
+sine
+sit
+skal
+skulle
+som
+stor
+store
+syv
+sÃ¥
+sÃ¥dan
+tag
+tage
+thi
+ti
+til
+to
+tre
+ud
+under
+var
+ved
+vi
+vil
+ville
+vor
+vores
+vÃ¦re
+vÃ¦ret
\ No newline at end of file
diff --git a/static/stopwords/de b/static/stopwords/de
new file mode 100644
index 0000000..b3c8c66
--- /dev/null
+++ b/static/stopwords/de
@@ -0,0 +1,620 @@
+a
+ab
+aber
+ach
+acht
+achte
+achten
+achter
+achtes
+ag
+alle
+allein
+allem
+allen
+aller
+allerdings
+alles
+allgemeinen
+als
+also
+am
+an
+ander
+andere
+anderem
+anderen
+anderer
+anderes
+anderm
+andern
+anderr
+anders
+au
+auch
+auf
+aus
+ausser
+ausserdem
+auÃer
+auÃerdem
+b
+bald
+bei
+beide
+beiden
+beim
+beispiel
+bekannt
+bereits
+besonders
+besser
+besten
+bin
+bis
+bisher
+bist
+c
+d
+d.h
+da
+dabei
+dadurch
+dafÃ¼r
+dagegen
+daher
+dahin
+dahinter
+damals
+damit
+danach
+daneben
+dank
+dann
+daran
+darauf
+daraus
+darf
+darfst
+darin
+darum
+darunter
+darÃ¼ber
+das
+dasein
+daselbst
+dass
+dasselbe
+davon
+davor
+dazu
+dazwischen
+daÃ
+dein
+deine
+deinem
+deinen
+deiner
+deines
+dem
+dementsprechend
+demgegenÃ¼ber
+demgemÃ¤ss
+demgemÃ¤Ã
+demselben
+demzufolge
+den
+denen
+denn
+denselben
+der
+deren
+derer
+derjenige
+derjenigen
+dermassen
+dermaÃen
+derselbe
+derselben
+des
+deshalb
+desselben
+dessen
+deswegen
+dich
+die
+diejenige
+diejenigen
+dies
+diese
+dieselbe
+dieselben
+diesem
+diesen
+dieser
+dieses
+dir
+doch
+dort
+drei
+drin
+dritte
+dritten
+dritter
+drittes
+du
+durch
+durchaus
+durfte
+durften
+dÃ¼rfen
+dÃ¼rft
+e
+eben
+ebenso
+ehrlich
+ei
+ei,
+eigen
+eigene
+eigenen
+eigener
+eigenes
+ein
+einander
+eine
+einem
+einen
+einer
+eines
+einig
+einige
+einigem
+einigen
+einiger
+einiges
+einmal
+eins
+elf
+en
+ende
+endlich
+entweder
+er
+ernst
+erst
+erste
+ersten
+erster
+erstes
+es
+etwa
+etwas
+euch
+euer
+eure
+eurem
+euren
+eurer
+eures
+f
+folgende
+frÃ¼her
+fÃ¼nf
+fÃ¼nfte
+fÃ¼nften
+fÃ¼nfter
+fÃ¼nftes
+fÃ¼r
+g
+gab
+ganz
+ganze
+ganzen
+ganzer
+ganzes
+gar
+gedurft
+gegen
+gegenÃ¼ber
+gehabt
+gehen
+geht
+gekannt
+gekonnt
+gemacht
+gemocht
+gemusst
+genug
+gerade
+gern
+gesagt
+geschweige
+gewesen
+gewollt
+geworden
+gibt
+ging
+gleich
+gott
+gross
+grosse
+grossen
+grosser
+grosses
+groÃ
+groÃe
+groÃen
+groÃer
+groÃes
+gut
+gute
+guter
+gutes
+h
+hab
+habe
+haben
+habt
+hast
+hat
+hatte
+hatten
+hattest
+hattet
+heisst
+her
+heute
+hier
+hin
+hinter
+hoch
+hÃ¤tte
+hÃ¤tten
+i
+ich
+ihm
+ihn
+ihnen
+ihr
+ihre
+ihrem
+ihren
+ihrer
+ihres
+im
+immer
+in
+indem
+infolgedessen
+ins
+irgend
+ist
+j
+ja
+jahr
+jahre
+jahren
+je
+jede
+jedem
+jeden
+jeder
+jedermann
+jedermanns
+jedes
+jedoch
+jemand
+jemandem
+jemanden
+jene
+jenem
+jenen
+jener
+jenes
+jetzt
+k
+kam
+kann
+kannst
+kaum
+kein
+keine
+keinem
+keinen
+keiner
+keines
+kleine
+kleinen
+kleiner
+kleines
+kommen
+kommt
+konnte
+konnten
+kurz
+kÃ¶nnen
+kÃ¶nnt
+kÃ¶nnte
+l
+lang
+lange
+leicht
+leide
+lieber
+los
+m
+machen
+macht
+machte
+mag
+magst
+mahn
+mal
+man
+manche
+manchem
+manchen
+mancher
+manches
+mann
+mehr
+mein
+meine
+meinem
+meinen
+meiner
+meines
+mensch
+menschen
+mich
+mir
+mit
+mittel
+mochte
+mochten
+morgen
+muss
+musst
+musste
+mussten
+muÃ
+muÃt
+mÃ¶chte
+mÃ¶gen
+mÃ¶glich
+mÃ¶gt
+mÃ¼ssen
+mÃ¼sst
+mÃ¼Ãt
+n
+na
+nach
+nachdem
+nahm
+natÃ¼rlich
+neben
+nein
+neue
+neuen
+neun
+neunte
+neunten
+neunter
+neuntes
+nicht
+nichts
+nie
+niemand
+niemandem
+niemanden
+noch
+nun
+nur
+o
+ob
+oben
+oder
+offen
+oft
+ohne
+ordnung
+p
+q
+r
+recht
+rechte
+rechten
+rechter
+rechtes
+richtig
+rund
+s
+sa
+sache
+sagt
+sagte
+sah
+satt
+schlecht
+schluss
+schon
+sechs
+sechste
+sechsten
+sechster
+sechstes
+sehr
+sei
+seid
+seien
+sein
+seine
+seinem
+seinen
+seiner
+seines
+seit
+seitdem
+selbst
+sich
+sie
+sieben
+siebente
+siebenten
+siebenter
+siebentes
+sind
+so
+solang
+solche
+solchem
+solchen
+solcher
+solches
+soll
+sollen
+sollst
+sollt
+sollte
+sollten
+sondern
+sonst
+soweit
+sowie
+spÃ¤ter
+startseite
+statt
+steht
+suche
+t
+tag
+tage
+tagen
+tat
+teil
+tel
+tritt
+trotzdem
+tun
+u
+uhr
+um
+und
+uns
+unse
+unsem
+unsen
+unser
+unsere
+unserer
+unses
+unter
+v
+vergangenen
+viel
+viele
+vielem
+vielen
+vielleicht
+vier
+vierte
+vierten
+vierter
+viertes
+vom
+von
+vor
+w
+wahr
+wann
+war
+waren
+warst
+wart
+warum
+was
+weg
+wegen
+weil
+weit
+weiter
+weitere
+weiteren
+weiteres
+welche
+welchem
+welchen
+welcher
+welches
+wem
+wen
+wenig
+wenige
+weniger
+weniges
+wenigstens
+wenn
+wer
+werde
+werden
+werdet
+weshalb
+wessen
+wie
+wieder
+wieso
+will
+willst
+wir
+wird
+wirklich
+wirst
+wissen
+wo
+woher
+wohin
+wohl
+wollen
+wollt
+wollte
+wollten
+worden
+wurde
+wurden
+wÃ¤hrend
+wÃ¤hrenddem
+wÃ¤hrenddessen
+wÃ¤re
+wÃ¼rde
+wÃ¼rden
+x
+y
+z
+z.b
+zehn
+zehnte
+zehnten
+zehnter
+zehntes
+zeit
+zu
+zuerst
+zugleich
+zum
+zunÃ¤chst
+zur
+zurÃ¼ck
+zusammen
+zwanzig
+zwar
+zwei
+zweite
+zweiten
+zweiter
+zweites
+zwischen
+zwÃ¶lf
+Ã¼ber
+Ã¼berhaupt
+Ã¼brigens
\ No newline at end of file
diff --git a/static/stopwords/el b/static/stopwords/el
new file mode 100644
index 0000000..25deffb
--- /dev/null
+++ b/static/stopwords/el
@@ -0,0 +1,847 @@
+Î­Î½Î±
+Î­Î½Î±Î½
+Î­Î½Î±Ï
+Î±Î¹
+Î±ÎºÎ¿Î¼Î±
+Î±ÎºÎ¿Î¼Î·
+Î±ÎºÏÎ¹Î²ÏÏ
+Î±Î»Î·Î¸ÎµÎ¹Î±
+Î±Î»Î·Î¸Î¹Î½Î±
+Î±Î»Î»Î±
+Î±Î»Î»Î±ÏÎ¿Ï
+Î±Î»Î»ÎµÏ
+Î±Î»Î»Î·
+Î±Î»Î»Î·Î½
+Î±Î»Î»Î·Ï
+Î±Î»Î»Î¹ÏÏ
+Î±Î»Î»Î¹ÏÏÎ¹ÎºÎ±
+Î±Î»Î»Î¿
+Î±Î»Î»Î¿Î¹
+Î±Î»Î»Î¿Î¹ÏÏ
+Î±Î»Î»Î¿Î¹ÏÏÎ¹ÎºÎ±
+Î±Î»Î»Î¿Î½
+Î±Î»Î»Î¿Ï
+Î±Î»Î»Î¿ÏÎµ
+Î±Î»Î»Î¿Ï
+Î±Î»Î»Î¿ÏÏ
+Î±Î»Î»ÏÎ½
+Î±Î¼Î±
+Î±Î¼ÎµÏÎ±
+Î±Î¼ÎµÏÏÏ
+Î±Î½
+Î±Î½Î±
+Î±Î½Î±Î¼ÎµÏÎ±
+Î±Î½Î±Î¼ÎµÏÎ±Î¾Ï
+Î±Î½ÎµÏ
+Î±Î½ÏÎ¹
+Î±Î½ÏÎ¹ÏÎµÏÎ±
+Î±Î½ÏÎ¹Ï
+Î±Î½Ï
+Î±Î½ÏÏÎµÏÏ
+Î±Î¾Î±ÏÎ½Î±
+Î±Ï
+Î±ÏÎµÎ½Î±Î½ÏÎ¹
+Î±ÏÎ¿
+Î±ÏÎ¿ÏÎµ
+Î±ÏÏ
+Î±ÏÎ±
+Î±ÏÎ±Î³Îµ
+Î±ÏÎ³Î±
+Î±ÏÎ³Î¿ÏÎµÏÎ¿
+Î±ÏÎ¹ÏÏÎµÏÎ±
+Î±ÏÎºÎµÏÎ±
+Î±ÏÏÎ¹ÎºÎ±
+Î±Ï
+Î±ÏÏÎ¹Î¿
+Î±ÏÏÎ±
+Î±ÏÏÎµÏ
+Î±ÏÏÎµÏ
+Î±ÏÏÎ·
+Î±ÏÏÎ·Î½
+Î±ÏÏÎ·Ï
+Î±ÏÏÎ¿
+Î±ÏÏÎ¿Î¹
+Î±ÏÏÎ¿Î½
+Î±ÏÏÎ¿Ï
+Î±ÏÏÎ¿Ï
+Î±ÏÏÎ¿Ï
+Î±ÏÏÎ¿ÏÏ
+Î±ÏÏÎ¿ÏÏ
+Î±ÏÏÏÎ½
+Î±ÏÎ¿ÏÎ¿Ï
+Î±ÏÎ¿Ï
+Î±á¼±
+Î±á¼³
+Î±á¼µ
+Î±á½ÏÏÏ
+Î±á½Ïá½¸Ï
+Î±á½
+Î±âÎ¹Î±ÎºÎ¿ÏÎ±
+Î²ÎµÎ²Î±Î¹Î±
+Î²ÎµÎ²Î±Î¹Î¿ÏÎ±ÏÎ±
+Î³Î¬Ï
+Î³Î±
+Î³Î±^
+Î³Îµ
+Î³Î¹
+Î³Î¹Î±
+Î³Î¿á¿¦Î½
+Î³ÏÎ·Î³Î¿ÏÎ±
+Î³ÏÏÏ
+Î³á½°Ï
+Î´'
+Î´Î­
+Î´Î®
+Î´Î±Î¯
+Î´Î±Î¯Ï
+Î´Î±á½¶
+Î´Î±á½¶Ï
+Î´Îµ
+Î´ÎµÎ½
+Î´Î¹
+Î´Î¹'
+Î´Î¹Î¬
+Î´Î¹Î±
+Î´Î¹á½°
+Î´á½²
+Î´á½´
+Î´â
+ÎµÎ±Î½
+ÎµÎ±ÏÏÎ¿
+ÎµÎ±ÏÏÎ¿Î½
+ÎµÎ±ÏÏÎ¿Ï
+ÎµÎ±ÏÏÎ¿ÏÏ
+ÎµÎ±ÏÏÏÎ½
+ÎµÎ³ÎºÎ±Î¹ÏÎ±
+ÎµÎ³ÎºÎ±Î¹ÏÏÏ
+ÎµÎ³Ï
+ÎµÎ¹Î¸Îµ
+ÎµÎ¹Î¼Î±Î¹
+ÎµÎ¹Î¼Î±ÏÏÎµ
+ÎµÎ¹Î½Î±Î¹
+ÎµÎ¹Ï
+ÎµÎ¹ÏÎ±Î¹
+ÎµÎ¹ÏÎ±ÏÏÎµ
+ÎµÎ¹ÏÏÎµ
+ÎµÎ¹ÏÎµ
+ÎµÎ¹ÏÎ±
+ÎµÎ¹ÏÎ±Î¼Îµ
+ÎµÎ¹ÏÎ±Î½
+ÎµÎ¹ÏÎ±ÏÎµ
+ÎµÎ¹ÏÎµ
+ÎµÎ¹ÏÎµÏ
+ÎµÎ¹âÎµÎ¼Î·
+ÎµÎº
+ÎµÎºÎ±ÏÏÎ±
+ÎµÎºÎ±ÏÏÎµÏ
+ÎµÎºÎ±ÏÏÎ·
+ÎµÎºÎ±ÏÏÎ·Î½
+ÎµÎºÎ±ÏÏÎ·Ï
+ÎµÎºÎ±ÏÏÎ¿
+ÎµÎºÎ±ÏÏÎ¿Î¹
+ÎµÎºÎ±ÏÏÎ¿Î½
+ÎµÎºÎ±ÏÏÎ¿Ï
+ÎµÎºÎ±ÏÏÎ¿Ï
+ÎµÎºÎ±ÏÏÎ¿ÏÏ
+ÎµÎºÎ±ÏÏÏÎ½
+ÎµÎºÎµÎ¹
+ÎµÎºÎµÎ¹Î½Î±
+ÎµÎºÎµÎ¹Î½ÎµÏ
+ÎµÎºÎµÎ¹Î½ÎµÏ
+ÎµÎºÎµÎ¹Î½Î·
+ÎµÎºÎµÎ¹Î½Î·Î½
+ÎµÎºÎµÎ¹Î½Î·Ï
+ÎµÎºÎµÎ¹Î½Î¿
+ÎµÎºÎµÎ¹Î½Î¿Î¹
+ÎµÎºÎµÎ¹Î½Î¿Î½
+ÎµÎºÎµÎ¹Î½Î¿Ï
+ÎµÎºÎµÎ¹Î½Î¿Ï
+ÎµÎºÎµÎ¹Î½Î¿Ï
+ÎµÎºÎµÎ¹Î½Î¿ÏÏ
+ÎµÎºÎµÎ¹Î½Î¿ÏÏ
+ÎµÎºÎµÎ¹Î½ÏÎ½
+ÎµÎºÏÎ¿Ï
+ÎµÎ¼Î±Ï
+ÎµÎ¼ÎµÎ¹Ï
+ÎµÎ¼ÎµÎ½Î±
+ÎµÎ¼ÏÏÎ¿Ï
+ÎµÎ½
+ÎµÎ½Î±
+ÎµÎ½Î±Î½
+ÎµÎ½Î±Ï
+ÎµÎ½Î¿Ï
+ÎµÎ½ÏÎµÎ»ÏÏ
+ÎµÎ½ÏÎ¿Ï
+ÎµÎ½ÏÏÎ¼ÎµÏÎ±Î¾Ï
+ÎµÎ½Ï
+ÎµÎ½ÏÏ
+ÎµÎ¾
+ÎµÎ¾Î±ÏÎ½Î±
+ÎµÎ¾Î·Ï
+ÎµÎ¾Î¹ÏÎ¿Ï
+ÎµÎ¾Ï
+ÎµÏ
+ÎµÏÎ¯
+ÎµÏÎ±Î½Ï
+ÎµÏÎµÎ¹ÏÎ±
+ÎµÏÎµÎ¹âÎ·
+ÎµÏÎ¹
+ÎµÏÎ¹ÏÎ·Ï
+ÎµÏÎ¿Î¼ÎµÎ½ÏÏ
+ÎµÏÎ±Ï
+ÎµÏÎµÎ¹Ï
+ÎµÏÎµÎ½Î±
+ÎµÏÏÏ
+ÎµÏÏ
+ÎµÏÎµÏÎ±
+ÎµÏÎµÏÎ±Î¹
+ÎµÏÎµÏÎ±Ï
+ÎµÏÎµÏÎµÏ
+ÎµÏÎµÏÎ·
+ÎµÏÎµÏÎ·Ï
+ÎµÏÎµÏÎ¿
+ÎµÏÎµÏÎ¿Î¹
+ÎµÏÎµÏÎ¿Î½
+ÎµÏÎµÏÎ¿Ï
+ÎµÏÎµÏÎ¿Ï
+ÎµÏÎµÏÎ¿ÏÏ
+ÎµÏÎµÏÏÎ½
+ÎµÏÎ¿ÏÏÎ±
+ÎµÏÎ¿ÏÏÎµÏ
+ÎµÏÎ¿ÏÏÎ·
+ÎµÏÎ¿ÏÏÎ·Î½
+ÎµÏÎ¿ÏÏÎ·Ï
+ÎµÏÎ¿ÏÏÎ¿
+ÎµÏÎ¿ÏÏÎ¿Î¹
+ÎµÏÎ¿ÏÏÎ¿Î½
+ÎµÏÎ¿ÏÏÎ¿Ï
+ÎµÏÎ¿ÏÏÎ¿Ï
+ÎµÏÎ¿ÏÏÎ¿ÏÏ
+ÎµÏÎ¿ÏÏÏÎ½
+ÎµÏÏÎ¹
+ÎµÏÎ³Îµ
+ÎµÏÎ¸ÏÏ
+ÎµÏÏÏÏÏÏ
+ÎµÏÎµÎ¾Î·Ï
+ÎµÏÎµÎ¹
+ÎµÏÎµÎ¹Ï
+ÎµÏÎµÏÎµ
+ÎµÏÎ¸ÎµÏ
+ÎµÏÎ¿Î¼Îµ
+ÎµÏÎ¿ÏÎ¼Îµ
+ÎµÏÎ¿ÏÎ½
+ÎµÏÏÎµÏ
+ÎµÏÏ
+ÎµÏÏ
+Îµá¼°
+Îµá¼°Î¼Î¯
+Îµá¼°Î¼á½¶
+Îµá¼°Ï
+Îµá¼°Ï
+Îµá¼´
+Îµá¼´Î¼Î¹
+Îµá¼´ÏÎµ
+ÎµâÏ
+Î·
+Î·Î¼Î±ÏÏÎ±Î½
+Î·Î¼Î±ÏÏÎµ
+Î·Î¼Î¿ÏÎ½
+Î·ÏÎ±ÏÏÎ±Î½
+Î·ÏÎ±ÏÏÎµ
+Î·ÏÎ¿ÏÎ½
+Î·ÏÎ±Î½
+Î·ÏÎ±Î½Îµ
+Î·ÏÎ¿Î¹
+Î·ÏÏÎ¿Î½
+Î·âÎ·
+Î¸Î±
+Î¹
+Î¹Î¹
+Î¹Î¹Î¹
+Î¹ÏÎ±Î¼Îµ
+Î¹ÏÎ¹Î±
+Î¹ÏÏÏ
+Î¹ÏÏÏ
+Î¹âÎ¹Î±
+Î¹âÎ¹Î±Î½
+Î¹âÎ¹Î±Ï
+Î¹âÎ¹ÎµÏ
+Î¹âÎ¹Î¿
+Î¹âÎ¹Î¿Î¹
+Î¹âÎ¹Î¿Î½
+Î¹âÎ¹Î¿Ï
+Î¹âÎ¹Î¿Ï
+Î¹âÎ¹Î¿ÏÏ
+Î¹âÎ¹ÏÎ½
+Î¹âÎ¹ÏÏ
+Îº
+ÎºÎ±Î¯
+ÎºÎ±Î¯ÏÎ¿Î¹
+ÎºÎ±Î¸
+ÎºÎ±Î¸Îµ
+ÎºÎ±Î¸ÎµÎ¼Î¹Î±
+ÎºÎ±Î¸ÎµÎ¼Î¹Î±Ï
+ÎºÎ±Î¸ÎµÎ½Î±
+ÎºÎ±Î¸ÎµÎ½Î±Ï
+ÎºÎ±Î¸ÎµÎ½Î¿Ï
+ÎºÎ±Î¸ÎµÏÎ¹
+ÎºÎ±Î¸Î¿Î»Î¿Ï
+ÎºÎ±Î¸ÏÏ
+ÎºÎ±Î¹
+ÎºÎ±ÎºÎ±
+ÎºÎ±ÎºÏÏ
+ÎºÎ±Î»Î±
+ÎºÎ±Î»ÏÏ
+ÎºÎ±Î¼Î¹Î±
+ÎºÎ±Î¼Î¹Î±Î½
+ÎºÎ±Î¼Î¹Î±Ï
+ÎºÎ±Î¼ÏÎ¿ÏÎ±
+ÎºÎ±Î¼ÏÎ¿ÏÎµÏ
+ÎºÎ±Î¼ÏÎ¿ÏÎ·
+ÎºÎ±Î¼ÏÎ¿ÏÎ·Î½
+ÎºÎ±Î¼ÏÎ¿ÏÎ·Ï
+ÎºÎ±Î¼ÏÎ¿ÏÎ¿
+ÎºÎ±Î¼ÏÎ¿ÏÎ¿Î¹
+ÎºÎ±Î¼ÏÎ¿ÏÎ¿Î½
+ÎºÎ±Î¼ÏÎ¿ÏÎ¿Ï
+ÎºÎ±Î¼ÏÎ¿ÏÎ¿Ï
+ÎºÎ±Î¼ÏÎ¿ÏÎ¿ÏÏ
+ÎºÎ±Î¼ÏÎ¿ÏÏÎ½
+ÎºÎ±Î½ÎµÎ¹Ï
+ÎºÎ±Î½ÎµÎ½
+ÎºÎ±Î½ÎµÎ½Î±
+ÎºÎ±Î½ÎµÎ½Î±Î½
+ÎºÎ±Î½ÎµÎ½Î±Ï
+ÎºÎ±Î½ÎµÎ½Î¿Ï
+ÎºÎ±ÏÎ¿Î¹Î±
+ÎºÎ±ÏÎ¿Î¹Î±Î½
+ÎºÎ±ÏÎ¿Î¹Î±Ï
+ÎºÎ±ÏÎ¿Î¹ÎµÏ
+ÎºÎ±ÏÎ¿Î¹Î¿
+ÎºÎ±ÏÎ¿Î¹Î¿Î¹
+ÎºÎ±ÏÎ¿Î¹Î¿Î½
+ÎºÎ±ÏÎ¿Î¹Î¿Ï
+ÎºÎ±ÏÎ¿Î¹Î¿Ï
+ÎºÎ±ÏÎ¿Î¹Î¿ÏÏ
+ÎºÎ±ÏÎ¿Î¹ÏÎ½
+ÎºÎ±ÏÎ¿ÏÎµ
+ÎºÎ±ÏÎ¿Ï
+ÎºÎ±ÏÏÏ
+ÎºÎ±Ï
+ÎºÎ±ÏÎ¬
+ÎºÎ±ÏÎ±
+ÎºÎ±ÏÎ¹
+ÎºÎ±ÏÎ¹ÏÎ¹
+ÎºÎ±ÏÎ¿ÏÎ¹Î½
+ÎºÎ±ÏÏ
+ÎºÎ±Ïá½°
+ÎºÎ±á½¶
+ÎºÎ¹
+ÎºÎ¹Î¿Î»Î±Ï
+ÎºÎ»Ï
+ÎºÎ¿Î½ÏÎ±
+ÎºÏÎ»
+ÎºÏÏÎ¹ÏÏ
+Îºá¼Î½
+Îºá¼Î½
+Î»Î¹Î³Î±ÎºÎ¹
+Î»Î¹Î³Î¿
+Î»Î¹Î³ÏÏÎµÏÎ¿
+Î»Î¿Î³Ï
+Î»Î¿Î¹ÏÎ±
+Î»Î¿Î¹ÏÎ¿Î½
+Î¼Î­Î½
+Î¼Î­ÏÎ±
+Î¼Î®
+Î¼Î®ÏÎµ
+Î¼Î¯Î±
+Î¼Î±
+Î¼Î±Î¶Î¹
+Î¼Î±ÎºÎ±ÏÎ¹
+Î¼Î±ÎºÏÏÎ±
+Î¼Î±Î»Î¹ÏÏÎ±
+Î¼Î±Î»Î»Î¿Î½
+Î¼Î±Ï
+Î¼Îµ
+Î¼ÎµÎ¸
+Î¼ÎµÎ¸Î±ÏÏÎ¹Î¿
+Î¼ÎµÎ¹Î¿Î½
+Î¼ÎµÎ»ÎµÎ¹
+Î¼ÎµÎ»Î»ÎµÏÎ±Î¹
+Î¼ÎµÎ¼Î¹Î±Ï
+Î¼ÎµÎ½
+Î¼ÎµÏÎ¹ÎºÎ±
+Î¼ÎµÏÎ¹ÎºÎµÏ
+Î¼ÎµÏÎ¹ÎºÎ¿Î¹
+Î¼ÎµÏÎ¹ÎºÎ¿ÏÏ
+Î¼ÎµÏÎ¹ÎºÏÎ½
+Î¼ÎµÏÎ±
+Î¼ÎµÏ
+Î¼ÎµÏÎ¬
+Î¼ÎµÏÎ±
+Î¼ÎµÏÎ±Î¾Ï
+Î¼ÎµÏá½°
+Î¼ÎµÏÏÎ¹
+Î¼Î·
+Î¼Î·Î½
+Î¼Î·ÏÏÏ
+Î¼Î·ÏÎµ
+Î¼Î·âÎµ
+Î¼Î¹Î¬
+Î¼Î¹Î±
+Î¼Î¹Î±Î½
+Î¼Î¹Î±Ï
+Î¼Î¿Î»Î¹Ï
+Î¼Î¿Î»Î¿Î½Î¿ÏÎ¹
+Î¼Î¿Î½Î±ÏÎ±
+Î¼Î¿Î½ÎµÏ
+Î¼Î¿Î½Î·
+Î¼Î¿Î½Î·Î½
+Î¼Î¿Î½Î·Ï
+Î¼Î¿Î½Î¿
+Î¼Î¿Î½Î¿Î¹
+Î¼Î¿Î½Î¿Î¼Î¹Î±Ï
+Î¼Î¿Î½Î¿Ï
+Î¼Î¿Î½Î¿Ï
+Î¼Î¿Î½Î¿ÏÏ
+Î¼Î¿Î½ÏÎ½
+Î¼Î¿Ï
+Î¼ÏÎ¿ÏÎµÎ¹
+Î¼ÏÎ¿ÏÎ¿ÏÎ½
+Î¼ÏÏÎ±Î²Î¿
+Î¼ÏÏÎ¿Ï
+Î¼á¼Î½
+Î¼á½²Î½
+Î¼á½´
+Î¼á½´Î½
+Î½Î±
+Î½Î±Î¹
+Î½ÏÏÎ¹Ï
+Î¾Î±Î½Î±
+Î¾Î±ÏÎ½Î¹ÎºÎ±
+Î¿
+Î¿Î¹
+Î¿Î»Î±
+Î¿Î»ÎµÏ
+Î¿Î»Î·
+Î¿Î»Î·Î½
+Î¿Î»Î·Ï
+Î¿Î»Î¿
+Î¿Î»Î¿Î³ÏÏÎ±
+Î¿Î»Î¿Î¹
+Î¿Î»Î¿Î½
+Î¿Î»Î¿Î½ÎµÎ½
+Î¿Î»Î¿Ï
+Î¿Î»Î¿ÏÎµÎ»Î±
+Î¿Î»Î¿Ï
+Î¿Î»Î¿ÏÏ
+Î¿Î»ÏÎ½
+Î¿Î»ÏÏ
+Î¿Î»ÏÏâÎ¹Î¿Î»Î¿Ï
+Î¿Î¼ÏÏ
+Î¿Î¼ÏÏ
+Î¿ÏÎ¿Î¹Î±
+Î¿ÏÎ¿Î¹Î±Î½
+Î¿ÏÎ¿Î¹Î±Î½âÎ·ÏÎ¿ÏÎµ
+Î¿ÏÎ¿Î¹Î±Ï
+Î¿ÏÎ¿Î¹Î±ÏâÎ·ÏÎ¿ÏÎµ
+Î¿ÏÎ¿Î¹Î±âÎ·ÏÎ¿ÏÎµ
+Î¿ÏÎ¿Î¹ÎµÏ
+Î¿ÏÎ¿Î¹ÎµÏâÎ·ÏÎ¿ÏÎµ
+Î¿ÏÎ¿Î¹Î¿
+Î¿ÏÎ¿Î¹Î¿Î¹
+Î¿ÏÎ¿Î¹Î¿Î½
+Î¿ÏÎ¿Î¹Î¿Î½âÎ·ÏÎ¿ÏÎµ
+Î¿ÏÎ¿Î¹Î¿Ï
+Î¿ÏÎ¿Î¹Î¿ÏâÎ·ÏÎ¿ÏÎµ
+Î¿ÏÎ¿Î¹Î¿Ï
+Î¿ÏÎ¿Î¹Î¿ÏÏ
+Î¿ÏÎ¿Î¹Î¿ÏÏâÎ·ÏÎ¿ÏÎµ
+Î¿ÏÎ¿Î¹Î¿ÏâÎ·ÏÎ¿ÏÎµ
+Î¿ÏÎ¿Î¹Î¿âÎ·ÏÎ¿ÏÎµ
+Î¿ÏÎ¿Î¹ÏÎ½
+Î¿ÏÎ¿Î¹ÏÎ½âÎ·ÏÎ¿ÏÎµ
+Î¿ÏÎ¿Î¹âÎ·ÏÎ¿ÏÎµ
+Î¿ÏÎ¿ÏÎµ
+Î¿ÏÎ¿ÏÎµâÎ·ÏÎ¿ÏÎµ
+Î¿ÏÎ¿Ï
+Î¿ÏÎ¿ÏâÎ·ÏÎ¿ÏÎµ
+Î¿ÏÏÏ
+Î¿ÏÏÏ
+Î¿ÏÎ¹ÏÎ¼ÎµÎ½Î±
+Î¿ÏÎ¹ÏÎ¼ÎµÎ½ÎµÏ
+Î¿ÏÎ¹ÏÎ¼ÎµÎ½ÏÎ½
+Î¿ÏÎ¹ÏÎ¼ÎµÎ½ÏÏ
+Î¿ÏÎ±
+Î¿ÏÎ±âÎ·ÏÎ¿ÏÎµ
+Î¿ÏÎµÏ
+Î¿ÏÎµÏâÎ·ÏÎ¿ÏÎµ
+Î¿ÏÎ·
+Î¿ÏÎ·Î½
+Î¿ÏÎ·Î½âÎ·ÏÎ¿ÏÎµ
+Î¿ÏÎ·Ï
+Î¿ÏÎ·ÏâÎ·ÏÎ¿ÏÎµ
+Î¿ÏÎ·âÎ·ÏÎ¿ÏÎµ
+Î¿ÏÎ¿
+Î¿ÏÎ¿Î¹
+Î¿ÏÎ¿Î¹âÎ·ÏÎ¿ÏÎµ
+Î¿ÏÎ¿Î½
+Î¿ÏÎ¿Î½âÎ·ÏÎ¿ÏÎµ
+Î¿ÏÎ¿Ï
+Î¿ÏÎ¿ÏâÎ·ÏÎ¿ÏÎµ
+Î¿ÏÎ¿Ï
+Î¿ÏÎ¿ÏÏ
+Î¿ÏÎ¿ÏÏâÎ·ÏÎ¿ÏÎµ
+Î¿ÏÎ¿ÏâÎ·ÏÎ¿ÏÎµ
+Î¿ÏÎ¿âÎ·ÏÎ¿ÏÎµ
+Î¿ÏÏÎ½
+Î¿ÏÏÎ½âÎ·ÏÎ¿ÏÎµ
+Î¿ÏÎ±Î½
+Î¿ÏÎ¹
+Î¿ÏÎ¹âÎ·ÏÎ¿ÏÎµ
+Î¿ÏÎ¿Ï
+Î¿Ï
+Î¿ÏÏÎµ
+Î¿ÏâÎµ
+Î¿ÏÎ¹
+Î¿á¼±
+Î¿á¼³
+Î¿á¼·Ï
+Î¿á½
+Î¿á½Î´
+Î¿á½Î´Î­
+Î¿á½Î´ÎµÎ¯Ï
+Î¿á½Î´Îµá½¶Ï
+Î¿á½Î´á½²
+Î¿á½Î´á½²Î½
+Î¿á½Îº
+Î¿á½Ï
+Î¿á½Ïá½¶
+Î¿á½Ï
+Î¿á½ÏÎµ
+Î¿á½ÏÏ
+Î¿á½ÏÏÏ
+Î¿á½ÏÏÏ
+Î¿á½Î½
+Î¿á½
+Î¿á½ÏÎ¿Ï
+Î¿á½ÏÎ¿Ï
+ÏÎ±Î»Î¹
+ÏÎ±Î½ÏÎ¿ÏÎµ
+ÏÎ±Î½ÏÎ¿Ï
+ÏÎ±Î½ÏÏÏ
+ÏÎ±Ï
+ÏÎ±ÏÎ¬
+ÏÎ±ÏÎ±
+ÏÎ±Ïá½°
+ÏÎµÏÎ¯
+ÏÎµÏÎ±
+ÏÎµÏÎ¹
+ÏÎµÏÎ¹ÏÎ¿Ï
+ÏÎµÏÎ¹ÏÏÎ¿ÏÎµÏÎ¿
+ÏÎµÏÏÎ¹
+ÏÎµÏÏÏÎ¹
+ÏÎµÏá½¶
+ÏÎ¹Î±
+ÏÎ¹Î¸Î±Î½Î¿Î½
+ÏÎ¹Î¿
+ÏÎ¹ÏÏ
+ÏÎ»Î±Î¹
+ÏÎ»ÎµÎ¿Î½
+ÏÎ»Î·Î½
+ÏÎ¿Î¹Î±
+ÏÎ¿Î¹Î±Î½
+ÏÎ¿Î¹Î±Ï
+ÏÎ¿Î¹ÎµÏ
+ÏÎ¿Î¹ÎµÏ
+ÏÎ¿Î¹Î¿
+ÏÎ¿Î¹Î¿Î¹
+ÏÎ¿Î¹Î¿Î½
+ÏÎ¿Î¹Î¿Ï
+ÏÎ¿Î¹Î¿Ï
+ÏÎ¿Î¹Î¿Ï
+ÏÎ¿Î¹Î¿ÏÏ
+ÏÎ¿Î¹Î¿ÏÏ
+ÏÎ¿Î¹ÏÎ½
+ÏÎ¿Î»Ï
+ÏÎ¿ÏÎµÏ
+ÏÎ¿ÏÎ·
+ÏÎ¿ÏÎ·Î½
+ÏÎ¿ÏÎ·Ï
+ÏÎ¿ÏÎ¿Î¹
+ÏÎ¿ÏÎ¿Ï
+ÏÎ¿ÏÎ¿ÏÏ
+ÏÎ¿ÏÎµ
+ÏÎ¿Ï
+ÏÎ¿ÏÎ¸Îµ
+ÏÎ¿ÏÎ¸ÎµÎ½Î±
+ÏÎ¿á¿¦
+ÏÏÎµÏÎµÎ¹
+ÏÏÎ¹Î½
+ÏÏÎ¿
+ÏÏÎ¿ÎºÎµÎ¹Î¼ÎµÎ½Î¿Ï
+ÏÏÎ¿ÎºÎµÎ¹ÏÎ±Î¹
+ÏÏÎ¿ÏÎµÏÏÎ¹
+ÏÏÎ¿Ï
+ÏÏÎ¿Ï
+ÏÏÎ¿ÏÎ¿Ï
+ÏÏÎ¿ÏÎ¸ÎµÏ
+ÏÏÎ¿ÏÏÎµÏ
+ÏÏÏÏÏÏÎµÏÎ±
+ÏÏÏÏ
+ÏÏá½¸
+ÏÏá½¸Ï
+ÏÏÏ
+ÏÏÏ
+ÏÎ±Î½
+ÏÎ±Ï
+ÏÎµ
+ÏÎµÎ¹Ï
+ÏÎ·Î¼ÎµÏÎ±
+ÏÎ¹Î³Î±
+ÏÎ¿Ï
+ÏÏÎ±
+ÏÏÎ·
+ÏÏÎ·Î½
+ÏÏÎ·Ï
+ÏÏÎ¹Ï
+ÏÏÎ¿
+ÏÏÎ¿Î½
+ÏÏÎ¿Ï
+ÏÏÎ¿ÏÏ
+ÏÏÏÎ½
+ÏÏÎ³ÏÏÎ¿Î½ÏÏ
+ÏÏÎ½
+ÏÏÎ½Î±Î¼Î±
+ÏÏÎ½ÎµÏÏÏ
+ÏÏÎ½Î·Î¸ÏÏ
+ÏÏÏÎ½Î±
+ÏÏÏÎ½Î±Ï
+ÏÏÏÎ½ÎµÏ
+ÏÏÏÎ½Î·
+ÏÏÏÎ½Î·Î½
+ÏÏÏÎ½Î·Ï
+ÏÏÏÎ½Î¿
+ÏÏÏÎ½Î¿Î¹
+ÏÏÏÎ½Î¿Î½
+ÏÏÏÎ½Î¿Ï
+ÏÏÏÎ½Î¿Ï
+ÏÏÏÎ½Î¿ÏÏ
+ÏÏÏÎ½ÏÎ½
+ÏÏÏÎ½ÏÏ
+ÏÏÎµâÎ¿Î½
+ÏÏÏÏÎ±
+ÏÏÏ
+ÏÏ
+ÏÏÎ½
+Ïá½¸Ï
+Ïá½º
+Ïá½ºÎ½
+ÏÎ¬
+ÏÎ®Î½
+ÏÎ¯
+ÏÎ¯Ï
+ÏÎ¯Ï
+ÏÎ±
+ÏÎ±ÏÏÎ±
+ÏÎ±ÏÏÎµÏ
+ÏÎ±ÏÏÎ·
+ÏÎ±ÏÏÎ·Î½
+ÏÎ±ÏÏÎ·Ï
+ÏÎ±ÏÏÎ¿,ÏÎ±ÏÏÎ¿Î½
+ÏÎ±ÏÏÎ¿Ï
+ÏÎ±ÏÏÎ¿Ï
+ÏÎ±ÏÏÏÎ½
+ÏÎ±ÏÎ±
+ÏÎ±ÏÎ±ÏÎµ
+ÏÎ±á¿Ï
+ÏÎ±âÎµ
+ÏÎµ
+ÏÎµÎ»Î¹ÎºÎ±
+ÏÎµÎ»Î¹ÎºÏÏ
+ÏÎµÏ
+ÏÎµÏÎ¿Î¹Î±
+ÏÎµÏÎ¿Î¹Î±Î½
+ÏÎµÏÎ¿Î¹Î±Ï
+ÏÎµÏÎ¿Î¹ÎµÏ
+ÏÎµÏÎ¿Î¹Î¿
+ÏÎµÏÎ¿Î¹Î¿Î¹
+ÏÎµÏÎ¿Î¹Î¿Î½
+ÏÎµÏÎ¿Î¹Î¿Ï
+ÏÎµÏÎ¿Î¹Î¿Ï
+ÏÎµÏÎ¿Î¹Î¿ÏÏ
+ÏÎµÏÎ¿Î¹ÏÎ½
+ÏÎ·
+ÏÎ·Î½
+ÏÎ·Ï
+ÏÎ·Ï
+ÏÎ¹
+ÏÎ¹Î½Î±
+ÏÎ¹ÏÎ¿ÏÎ±
+ÏÎ¹ÏÎ¿ÏÎµ
+ÏÎ¹Ï
+ÏÎ¹Ï
+ÏÎ¿
+ÏÎ¿Î¯
+ÏÎ¿Î¹
+ÏÎ¿Î¹Î¿á¿¦ÏÎ¿Ï
+ÏÎ¿Î¹Î¿á¿¦ÏÎ¿Ï
+ÏÎ¿Î½
+ÏÎ¿Ï
+ÏÎ¿ÏÎ±
+ÏÎ¿ÏÎµÏ
+ÏÎ¿ÏÎ·
+ÏÎ¿ÏÎ·Î½
+ÏÎ¿ÏÎ·Ï
+ÏÎ¿ÏÎ¿
+ÏÎ¿ÏÎ¿Î¹
+ÏÎ¿ÏÎ¿Î½
+ÏÎ¿ÏÎ¿Ï
+ÏÎ¿ÏÎ¿Ï
+ÏÎ¿ÏÎ¿ÏÏ
+ÏÎ¿ÏÏÎ½
+ÏÎ¿ÏÎµ
+ÏÎ¿Ï
+ÏÎ¿ÏÎ»Î±ÏÎ¹ÏÏÎ¿
+ÏÎ¿ÏÎ»Î±ÏÎ¹ÏÏÎ¿Î½
+ÏÎ¿ÏÏ
+ÏÎ¿ÏÏÎ±
+ÏÎ¿ÏÏÎµÏ
+ÏÎ¿ÏÏÎ·
+ÏÎ¿ÏÏÎ·Î½
+ÏÎ¿ÏÏÎ·Ï
+ÏÎ¿ÏÏÎ¿
+ÏÎ¿ÏÏÎ¿Î¹
+ÏÎ¿ÏÏÎ¿Î¹Ï
+ÏÎ¿ÏÏÎ¿Î½
+ÏÎ¿ÏÏÎ¿Ï
+ÏÎ¿ÏÏÎ¿Ï
+ÏÎ¿ÏÏÎ¿ÏÏ
+ÏÎ¿ÏÏÏÎ½
+ÏÎ¿ÏÏ
+ÏÎ¿á½ºÏ
+ÏÎ¿á¿Ï
+ÏÎ¿á¿¦
+ÏÏÏÎ¿Î½
+ÏÏÎ½
+ÏÏÏÎ±
+ÏÏ
+ÏÏÎ½
+ÏÏÏÎµ
+Ïá½°
+Ïá½°Ï
+Ïá½´Î½
+Ïá½¸
+Ïá½¸Î½
+Ïá¿Ï
+Ïá¿Ï
+Ïá¿
+Ïá¿¶Î½
+Ïá¿·
+ÏÏ
+ÏÏÎµÏ
+ÏÏÎ¿
+ÏÏÎ¿ÏÎ·
+ÏÏÎ¿ÏÎ¹Î½
+ÏÏÏ
+ÏÏÏÎµÏÎ±
+ÏÎµÏÎ¿Ï
+ÏÎ±Î¼Î·Î»Î±
+ÏÎ¸ÎµÏ
+ÏÏÎµÏ
+ÏÏÏÎ¹Ï
+ÏÏÏÎ¹ÏÏÎ±
+ÏÎ·Î»Î±
+Ï
+ÏÏÎ±Î¹Î±
+ÏÏ
+ÏÏ
+ÏÏÎ±Î½
+ÏÏÎ¿ÏÎ¿Ï
+ÏÏÏÎ¿Ï
+ÏÏÏÎµ
+ÏÏÏÎ¿ÏÎ¿
+ÏÏ
+á¼Î»Î»'
+á¼Î»Î»Î¬
+á¼Î»Î»á½°
+á¼Î»Î»â
+á¼Ï
+á¼ÏÏ
+á¼Ïá½¸
+á¼Ï
+á¼Î½
+á¼
+á¼Î»Î»Î¿Ï
+á¼Î»Î»Î¿Ï
+á¼Î½
+á¼ÏÎ±
+á¼Î¼Î±
+á¼Î¬Î½
+á¼Î³Ï
+á¼Î³á½¼
+á¼Îº
+á¼Î¼ÏÏ
+á¼Î¼á½¸Ï
+á¼Î½
+á¼Î¾
+á¼ÏÎ¯
+á¼ÏÎµá½¶
+á¼Ïá½¶
+á¼ÏÏÎ¹
+á¼Ï
+á¼á½°Î½
+á¼Î±ÏÏÎ¿á¿¦
+á¼ÏÎ¹
+á¼¡
+á¼¢
+á¼£
+á¼¤
+á¼¥
+á¼§Ï
+á¼µÎ½Î±
+á½
+á½
+á½Î½
+á½Ï
+á½
+á½Î´Îµ
+á½Î¸ÎµÎ½
+á½ÏÎµÏ
+á½Ï
+á½Ï
+á½ÏÏÎ¹Ï
+á½ÏÏÎ¹Ï
+á½ÏÎµ
+á½ÏÎ¹
+á½Î¼ÏÏ
+á½Ï
+á½ÏÎ­Ï
+á½ÏÏ
+á½Ïá½²Ï
+á½Ïá½¸
+á½¡Ï
+á½¡Ï
+á½¥Ï
+á½¥ÏÏÎµ
+á½¦
+á¾§
+âÎ±
+âÎµ
+âÎµÎ¹Î½Î±
+âÎµÎ½
+âÎµÎ¾Î¹Î±
+âÎ·Î¸ÎµÎ½
+âÎ·Î»Î±âÎ·
+âÎ¹
+âÎ¹Î±
+âÎ¹Î±ÏÎºÏÏ
+âÎ¹ÎºÎ±
+âÎ¹ÎºÎ¿
+âÎ¹ÎºÎ¿Î¹
+âÎ¹ÎºÎ¿Ï
+âÎ¹ÎºÎ¿Ï
+âÎ¹ÎºÎ¿ÏÏ
+âÎ¹Î¿Î»Î¿Ï
+âÎ¹ÏÎ»Î±
+âÎ¹ÏÏÏ
\ No newline at end of file
diff --git a/static/stopwords/en b/static/stopwords/en
new file mode 100644
index 0000000..e095216
--- /dev/null
+++ b/static/stopwords/en
@@ -0,0 +1,1298 @@
+'ll
+'tis
+'twas
+'ve
+10
+39
+a
+a's
+able
+ableabout
+about
+above
+abroad
+abst
+accordance
+according
+accordingly
+across
+act
+actually
+ad
+added
+adj
+adopted
+ae
+af
+affected
+affecting
+affects
+after
+afterwards
+ag
+again
+against
+ago
+ah
+ahead
+ai
+ain't
+aint
+al
+all
+allow
+allows
+almost
+alone
+along
+alongside
+already
+also
+although
+always
+am
+amid
+amidst
+among
+amongst
+amoungst
+amount
+an
+and
+announce
+another
+any
+anybody
+anyhow
+anymore
+anyone
+anything
+anyway
+anyways
+anywhere
+ao
+apart
+apparently
+appear
+appreciate
+appropriate
+approximately
+aq
+ar
+are
+area
+areas
+aren
+aren't
+arent
+arise
+around
+arpa
+as
+aside
+ask
+asked
+asking
+asks
+associated
+at
+au
+auth
+available
+aw
+away
+awfully
+az
+b
+ba
+back
+backed
+backing
+backs
+backward
+backwards
+bb
+bd
+be
+became
+because
+become
+becomes
+becoming
+been
+before
+beforehand
+began
+begin
+beginning
+beginnings
+begins
+behind
+being
+beings
+believe
+below
+beside
+besides
+best
+better
+between
+beyond
+bf
+bg
+bh
+bi
+big
+bill
+billion
+biol
+bj
+bm
+bn
+bo
+both
+bottom
+br
+brief
+briefly
+bs
+bt
+but
+buy
+bv
+bw
+by
+bz
+c
+c'mon
+c's
+ca
+call
+came
+can
+can't
+cannot
+cant
+caption
+case
+cases
+cause
+causes
+cc
+cd
+certain
+certainly
+cf
+cg
+ch
+changes
+ci
+ck
+cl
+clear
+clearly
+click
+cm
+cmon
+cn
+co
+co.
+com
+come
+comes
+computer
+con
+concerning
+consequently
+consider
+considering
+contain
+containing
+contains
+copy
+corresponding
+could
+could've
+couldn
+couldn't
+couldnt
+course
+cr
+cry
+cs
+cu
+currently
+cv
+cx
+cy
+cz
+d
+dare
+daren't
+darent
+date
+de
+dear
+definitely
+describe
+described
+despite
+detail
+did
+didn
+didn't
+didnt
+differ
+different
+differently
+directly
+dj
+dk
+dm
+do
+does
+doesn
+doesn't
+doesnt
+doing
+don
+don't
+done
+dont
+doubtful
+down
+downed
+downing
+downs
+downwards
+due
+during
+dz
+e
+each
+early
+ec
+ed
+edu
+ee
+effect
+eg
+eh
+eight
+eighty
+either
+eleven
+else
+elsewhere
+empty
+end
+ended
+ending
+ends
+enough
+entirely
+er
+es
+especially
+et
+et-al
+etc
+even
+evenly
+ever
+evermore
+every
+everybody
+everyone
+everything
+everywhere
+ex
+exactly
+example
+except
+f
+face
+faces
+fact
+facts
+fairly
+far
+farther
+felt
+few
+fewer
+ff
+fi
+fifteen
+fifth
+fifty
+fify
+fill
+find
+finds
+fire
+first
+five
+fix
+fj
+fk
+fm
+fo
+followed
+following
+follows
+for
+forever
+former
+formerly
+forth
+forty
+forward
+found
+four
+fr
+free
+from
+front
+full
+fully
+further
+furthered
+furthering
+furthermore
+furthers
+fx
+g
+ga
+gave
+gb
+gd
+ge
+general
+generally
+get
+gets
+getting
+gf
+gg
+gh
+gi
+give
+given
+gives
+giving
+gl
+gm
+gmt
+gn
+go
+goes
+going
+gone
+good
+goods
+got
+gotten
+gov
+gp
+gq
+gr
+great
+greater
+greatest
+greetings
+group
+grouped
+grouping
+groups
+gs
+gt
+gu
+gw
+gy
+h
+had
+hadn't
+hadnt
+half
+happens
+hardly
+has
+hasn
+hasn't
+hasnt
+have
+haven
+haven't
+havent
+having
+he
+he'd
+he'll
+he's
+hed
+hell
+hello
+help
+hence
+her
+here
+here's
+hereafter
+hereby
+herein
+heres
+hereupon
+hers
+herself
+herseâ
+hes
+hi
+hid
+high
+higher
+highest
+him
+himself
+himseâ
+his
+hither
+hk
+hm
+hn
+home
+homepage
+hopefully
+how
+how'd
+how'll
+how's
+howbeit
+however
+hr
+ht
+htm
+html
+http
+hu
+hundred
+i
+i'd
+i'll
+i'm
+i've
+i.e.
+id
+ie
+if
+ignored
+ii
+il
+ill
+im
+immediate
+immediately
+importance
+important
+in
+inasmuch
+inc
+inc.
+indeed
+index
+indicate
+indicated
+indicates
+information
+inner
+inside
+insofar
+instead
+int
+interest
+interested
+interesting
+interests
+into
+invention
+inward
+io
+iq
+ir
+is
+isn
+isn't
+isnt
+it
+it'd
+it'll
+it's
+itd
+itll
+its
+itself
+itseâ
+ive
+j
+je
+jm
+jo
+join
+jp
+just
+k
+ke
+keep
+keeps
+kept
+keys
+kg
+kh
+ki
+kind
+km
+kn
+knew
+know
+known
+knows
+kp
+kr
+kw
+ky
+kz
+l
+la
+large
+largely
+last
+lately
+later
+latest
+latter
+latterly
+lb
+lc
+least
+length
+less
+lest
+let
+let's
+lets
+li
+like
+liked
+likely
+likewise
+line
+little
+lk
+ll
+long
+longer
+longest
+look
+looking
+looks
+low
+lower
+lr
+ls
+lt
+ltd
+lu
+lv
+ly
+m
+ma
+made
+mainly
+make
+makes
+making
+man
+many
+may
+maybe
+mayn't
+maynt
+mc
+md
+me
+mean
+means
+meantime
+meanwhile
+member
+members
+men
+merely
+mg
+mh
+microsoft
+might
+might've
+mightn't
+mightnt
+mil
+mill
+million
+mine
+minus
+miss
+mk
+ml
+mm
+mn
+mo
+more
+moreover
+most
+mostly
+move
+mp
+mq
+mr
+mrs
+ms
+msie
+mt
+mu
+much
+mug
+must
+must've
+mustn't
+mustnt
+mv
+mw
+mx
+my
+myself
+myseâ
+mz
+n
+na
+name
+namely
+nay
+nc
+nd
+ne
+near
+nearly
+necessarily
+necessary
+need
+needed
+needing
+needn't
+neednt
+needs
+neither
+net
+netscape
+never
+neverf
+neverless
+nevertheless
+new
+newer
+newest
+next
+nf
+ng
+ni
+nine
+ninety
+nl
+no
+no-one
+nobody
+non
+none
+nonetheless
+noone
+nor
+normally
+nos
+not
+noted
+nothing
+notwithstanding
+novel
+now
+nowhere
+np
+nr
+nu
+null
+number
+numbers
+nz
+o
+obtain
+obtained
+obviously
+of
+off
+often
+oh
+ok
+okay
+old
+older
+oldest
+om
+omitted
+on
+once
+one
+one's
+ones
+only
+onto
+open
+opened
+opening
+opens
+opposite
+or
+ord
+order
+ordered
+ordering
+orders
+org
+other
+others
+otherwise
+ought
+oughtn't
+oughtnt
+our
+ours
+ourselves
+out
+outside
+over
+overall
+owing
+own
+p
+pa
+page
+pages
+part
+parted
+particular
+particularly
+parting
+parts
+past
+pe
+per
+perhaps
+pf
+pg
+ph
+pk
+pl
+place
+placed
+places
+please
+plus
+pm
+pmid
+pn
+point
+pointed
+pointing
+points
+poorly
+possible
+possibly
+potentially
+pp
+pr
+predominantly
+present
+presented
+presenting
+presents
+presumably
+previously
+primarily
+probably
+problem
+problems
+promptly
+proud
+provided
+provides
+pt
+put
+puts
+pw
+py
+q
+qa
+que
+quickly
+quite
+qv
+r
+ran
+rather
+rd
+re
+readily
+really
+reasonably
+recent
+recently
+ref
+refs
+regarding
+regardless
+regards
+related
+relatively
+research
+reserved
+respectively
+resulted
+resulting
+results
+right
+ring
+ro
+room
+rooms
+round
+ru
+run
+rw
+s
+sa
+said
+same
+saw
+say
+saying
+says
+sb
+sc
+sd
+se
+sec
+second
+secondly
+seconds
+section
+see
+seeing
+seem
+seemed
+seeming
+seems
+seen
+sees
+self
+selves
+sensible
+sent
+serious
+seriously
+seven
+seventy
+several
+sg
+sh
+shall
+shan't
+shant
+she
+she'd
+she'll
+she's
+shed
+shell
+shes
+should
+should've
+shouldn
+shouldn't
+shouldnt
+show
+showed
+showing
+shown
+showns
+shows
+si
+side
+sides
+significant
+significantly
+similar
+similarly
+since
+sincere
+site
+six
+sixty
+sj
+sk
+sl
+slightly
+sm
+small
+smaller
+smallest
+sn
+so
+some
+somebody
+someday
+somehow
+someone
+somethan
+something
+sometime
+sometimes
+somewhat
+somewhere
+soon
+sorry
+specifically
+specified
+specify
+specifying
+sr
+st
+state
+states
+still
+stop
+strongly
+su
+sub
+substantially
+successfully
+such
+sufficiently
+suggest
+sup
+sure
+sv
+sy
+system
+sz
+t
+t's
+take
+taken
+taking
+tc
+td
+tell
+ten
+tends
+test
+text
+tf
+tg
+th
+than
+thank
+thanks
+thanx
+that
+that'll
+that's
+that've
+thatll
+thats
+thatve
+the
+their
+theirs
+them
+themselves
+then
+thence
+there
+there'd
+there'll
+there're
+there's
+there've
+thereafter
+thereby
+thered
+therefore
+therein
+therell
+thereof
+therere
+theres
+thereto
+thereupon
+thereve
+these
+they
+they'd
+they'll
+they're
+they've
+theyd
+theyll
+theyre
+theyve
+thick
+thin
+thing
+things
+think
+thinks
+third
+thirty
+this
+thorough
+thoroughly
+those
+thou
+though
+thoughh
+thought
+thoughts
+thousand
+three
+throug
+through
+throughout
+thru
+thus
+til
+till
+tip
+tis
+tj
+tk
+tm
+tn
+to
+today
+together
+too
+took
+top
+toward
+towards
+tp
+tr
+tried
+tries
+trillion
+truly
+try
+trying
+ts
+tt
+turn
+turned
+turning
+turns
+tv
+tw
+twas
+twelve
+twenty
+twice
+two
+tz
+u
+ua
+ug
+uk
+um
+un
+under
+underneath
+undoing
+unfortunately
+unless
+unlike
+unlikely
+until
+unto
+up
+upon
+ups
+upwards
+us
+use
+used
+useful
+usefully
+usefulness
+uses
+using
+usually
+uucp
+uy
+uz
+v
+va
+value
+various
+vc
+ve
+versus
+very
+vg
+vi
+via
+viz
+vn
+vol
+vols
+vs
+vu
+w
+want
+wanted
+wanting
+wants
+was
+wasn
+wasn't
+wasnt
+way
+ways
+we
+we'd
+we'll
+we're
+we've
+web
+webpage
+website
+wed
+welcome
+well
+wells
+went
+were
+weren
+weren't
+werent
+weve
+wf
+what
+what'd
+what'll
+what's
+what've
+whatever
+whatll
+whats
+whatve
+when
+when'd
+when'll
+when's
+whence
+whenever
+where
+where'd
+where'll
+where's
+whereafter
+whereas
+whereby
+wherein
+wheres
+whereupon
+wherever
+whether
+which
+whichever
+while
+whilst
+whim
+whither
+who
+who'd
+who'll
+who's
+whod
+whoever
+whole
+wholl
+whom
+whomever
+whos
+whose
+why
+why'd
+why'll
+why's
+widely
+width
+will
+willing
+wish
+with
+within
+without
+won
+won't
+wonder
+wont
+words
+work
+worked
+working
+works
+world
+would
+would've
+wouldn
+wouldn't
+wouldnt
+ws
+www
+x
+y
+ye
+year
+years
+yes
+yet
+you
+you'd
+you'll
+you're
+you've
+youd
+youll
+young
+younger
+youngest
+your
+youre
+yours
+yourself
+yourselves
+youve
+yt
+yu
+z
+za
+zero
+zm
+zr
\ No newline at end of file
diff --git a/static/stopwords/eo b/static/stopwords/eo
new file mode 100644
index 0000000..bb209f3
--- /dev/null
+++ b/static/stopwords/eo
@@ -0,0 +1,173 @@
+adiaÅ­
+ajn
+al
+ankoraÅ­
+antaÅ­
+aÅ­
+bonan
+bonvole
+bonvolu
+bv
+ci
+cia
+cian
+cin
+d-ro
+da
+de
+dek
+deka
+do
+doktor'
+doktoro
+du
+dua
+dum
+eble
+ekz
+ekzemple
+en
+estas
+estis
+estos
+estu
+estus
+eÄ
+f-no
+feliÄan
+for
+fraÅ­lino
+ha
+havas
+havis
+havos
+havu
+havus
+he
+ho
+hu
+ili
+ilia
+ilian
+ilin
+inter
+io
+ion
+iu
+iujn
+iun
+ja
+jam
+je
+jes
+k
+kaj
+ke
+kio
+kion
+kiu
+kiujn
+kiun
+kvankam
+kvar
+kvara
+kvazaÅ­
+kvin
+kvina
+la
+li
+lia
+lian
+lin
+malantaÅ­
+male
+malgraÅ­
+mem
+mi
+mia
+mian
+min
+minus
+naÅ­
+naÅ­a
+ne
+nek
+nenio
+nenion
+neniu
+neniun
+nepre
+ni
+nia
+nian
+nin
+nu
+nun
+nur
+ok
+oka
+oni
+onia
+onian
+onin
+plej
+pli
+plu
+plus
+por
+post
+preter
+s-no
+s-ro
+se
+sed
+sep
+sepa
+ses
+sesa
+si
+sia
+sian
+sin
+sinjor'
+sinjorino
+sinjoro
+sub
+super
+supren
+sur
+tamen
+tio
+tion
+tiu
+tiujn
+tiun
+tra
+tri
+tria
+tuj
+tute
+unu
+unua
+ve
+verÅajne
+vi
+via
+vian
+vin
+Äi
+Äio
+Äion
+Äiu
+Äiujn
+Äiun
+Äu
+Äi
+Äia
+Äian
+Äin
+Äis
+Äµus
+Åi
+Åia
+Åin
\ No newline at end of file
diff --git a/static/stopwords/es b/static/stopwords/es
new file mode 100644
index 0000000..0cf607d
--- /dev/null
+++ b/static/stopwords/es
@@ -0,0 +1,732 @@
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+_
+a
+actualmente
+acuerdo
+adelante
+ademas
+ademÃ¡s
+adrede
+afirmÃ³
+agregÃ³
+ahi
+ahora
+ahÃ­
+al
+algo
+alguna
+algunas
+alguno
+algunos
+algÃºn
+alli
+allÃ­
+alrededor
+ambos
+ampleamos
+antano
+antaÃ±o
+ante
+anterior
+antes
+apenas
+aproximadamente
+aquel
+aquella
+aquellas
+aquello
+aquellos
+aqui
+aquÃ©l
+aquÃ©lla
+aquÃ©llas
+aquÃ©llos
+aquÃ­
+arriba
+arribaabajo
+asegurÃ³
+asi
+asÃ­
+atras
+aun
+aunque
+ayer
+aÃ±adiÃ³
+aÃºn
+b
+bajo
+bastante
+bien
+breve
+buen
+buena
+buenas
+bueno
+buenos
+c
+cada
+casi
+cerca
+cierta
+ciertas
+cierto
+ciertos
+cinco
+claro
+comentÃ³
+como
+con
+conmigo
+conocer
+conseguimos
+conseguir
+considera
+considerÃ³
+consigo
+consigue
+consiguen
+consigues
+contigo
+contra
+cosas
+creo
+cual
+cuales
+cualquier
+cuando
+cuanta
+cuantas
+cuanto
+cuantos
+cuatro
+cuenta
+cuÃ¡l
+cuÃ¡les
+cuÃ¡ndo
+cuÃ¡nta
+cuÃ¡ntas
+cuÃ¡nto
+cuÃ¡ntos
+cÃ³mo
+d
+da
+dado
+dan
+dar
+de
+debajo
+debe
+deben
+debido
+decir
+dejÃ³
+del
+delante
+demasiado
+demÃ¡s
+dentro
+deprisa
+desde
+despacio
+despues
+despuÃ©s
+detras
+detrÃ¡s
+dia
+dias
+dice
+dicen
+dicho
+dieron
+diferente
+diferentes
+dijeron
+dijo
+dio
+donde
+dos
+durante
+dÃ­a
+dÃ­as
+dÃ³nde
+e
+ejemplo
+el
+ella
+ellas
+ello
+ellos
+embargo
+empleais
+emplean
+emplear
+empleas
+empleo
+en
+encima
+encuentra
+enfrente
+enseguida
+entonces
+entre
+era
+erais
+eramos
+eran
+eras
+eres
+es
+esa
+esas
+ese
+eso
+esos
+esta
+estaba
+estabais
+estaban
+estabas
+estad
+estada
+estadas
+estado
+estados
+estais
+estamos
+estan
+estando
+estar
+estaremos
+estarÃ¡
+estarÃ¡n
+estarÃ¡s
+estarÃ©
+estarÃ©is
+estarÃ­a
+estarÃ­ais
+estarÃ­amos
+estarÃ­an
+estarÃ­as
+estas
+este
+estemos
+esto
+estos
+estoy
+estuve
+estuviera
+estuvierais
+estuvieran
+estuvieras
+estuvieron
+estuviese
+estuvieseis
+estuviesen
+estuvieses
+estuvimos
+estuviste
+estuvisteis
+estuviÃ©ramos
+estuviÃ©semos
+estuvo
+estÃ¡
+estÃ¡bamos
+estÃ¡is
+estÃ¡n
+estÃ¡s
+estÃ©
+estÃ©is
+estÃ©n
+estÃ©s
+ex
+excepto
+existe
+existen
+explicÃ³
+expresÃ³
+f
+fin
+final
+fue
+fuera
+fuerais
+fueran
+fueras
+fueron
+fuese
+fueseis
+fuesen
+fueses
+fui
+fuimos
+fuiste
+fuisteis
+fuÃ©ramos
+fuÃ©semos
+g
+general
+gran
+grandes
+gueno
+h
+ha
+haber
+habia
+habida
+habidas
+habido
+habidos
+habiendo
+habla
+hablan
+habremos
+habrÃ¡
+habrÃ¡n
+habrÃ¡s
+habrÃ©
+habrÃ©is
+habrÃ­a
+habrÃ­ais
+habrÃ­amos
+habrÃ­an
+habrÃ­as
+habÃ©is
+habÃ­a
+habÃ­ais
+habÃ­amos
+habÃ­an
+habÃ­as
+hace
+haceis
+hacemos
+hacen
+hacer
+hacerlo
+haces
+hacia
+haciendo
+hago
+han
+has
+hasta
+hay
+haya
+hayamos
+hayan
+hayas
+hayÃ¡is
+he
+hecho
+hemos
+hicieron
+hizo
+horas
+hoy
+hube
+hubiera
+hubierais
+hubieran
+hubieras
+hubieron
+hubiese
+hubieseis
+hubiesen
+hubieses
+hubimos
+hubiste
+hubisteis
+hubiÃ©ramos
+hubiÃ©semos
+hubo
+i
+igual
+incluso
+indicÃ³
+informo
+informÃ³
+intenta
+intentais
+intentamos
+intentan
+intentar
+intentas
+intento
+ir
+j
+junto
+k
+l
+la
+lado
+largo
+las
+le
+lejos
+les
+llegÃ³
+lleva
+llevar
+lo
+los
+luego
+lugar
+m
+mal
+manera
+manifestÃ³
+mas
+mayor
+me
+mediante
+medio
+mejor
+mencionÃ³
+menos
+menudo
+mi
+mia
+mias
+mientras
+mio
+mios
+mis
+misma
+mismas
+mismo
+mismos
+modo
+momento
+mucha
+muchas
+mucho
+muchos
+muy
+mÃ¡s
+mÃ­
+mÃ­a
+mÃ­as
+mÃ­o
+mÃ­os
+n
+nada
+nadie
+ni
+ninguna
+ningunas
+ninguno
+ningunos
+ningÃºn
+no
+nos
+nosotras
+nosotros
+nuestra
+nuestras
+nuestro
+nuestros
+nueva
+nuevas
+nuevo
+nuevos
+nunca
+o
+ocho
+os
+otra
+otras
+otro
+otros
+p
+pais
+para
+parece
+parte
+partir
+pasada
+pasado
+paÃ¬s
+peor
+pero
+pesar
+poca
+pocas
+poco
+pocos
+podeis
+podemos
+poder
+podria
+podriais
+podriamos
+podrian
+podrias
+podrÃ¡
+podrÃ¡n
+podrÃ­a
+podrÃ­an
+poner
+por
+por quÃ©
+porque
+posible
+primer
+primera
+primero
+primeros
+principalmente
+pronto
+propia
+propias
+propio
+propios
+proximo
+prÃ³ximo
+prÃ³ximos
+pudo
+pueda
+puede
+pueden
+puedo
+pues
+q
+qeu
+que
+quedÃ³
+queremos
+quien
+quienes
+quiere
+quiza
+quizas
+quizÃ¡
+quizÃ¡s
+quiÃ©n
+quiÃ©nes
+quÃ©
+r
+raras
+realizado
+realizar
+realizÃ³
+repente
+respecto
+s
+sabe
+sabeis
+sabemos
+saben
+saber
+sabes
+sal
+salvo
+se
+sea
+seamos
+sean
+seas
+segun
+segunda
+segundo
+segÃºn
+seis
+ser
+sera
+seremos
+serÃ¡
+serÃ¡n
+serÃ¡s
+serÃ©
+serÃ©is
+serÃ­a
+serÃ­ais
+serÃ­amos
+serÃ­an
+serÃ­as
+seÃ¡is
+seÃ±alÃ³
+si
+sido
+siempre
+siendo
+siete
+sigue
+siguiente
+sin
+sino
+sobre
+sois
+sola
+solamente
+solas
+solo
+solos
+somos
+son
+soy
+soyos
+su
+supuesto
+sus
+suya
+suyas
+suyo
+suyos
+sÃ©
+sÃ­
+sÃ³lo
+t
+tal
+tambien
+tambiÃ©n
+tampoco
+tan
+tanto
+tarde
+te
+temprano
+tendremos
+tendrÃ¡
+tendrÃ¡n
+tendrÃ¡s
+tendrÃ©
+tendrÃ©is
+tendrÃ­a
+tendrÃ­ais
+tendrÃ­amos
+tendrÃ­an
+tendrÃ­as
+tened
+teneis
+tenemos
+tener
+tenga
+tengamos
+tengan
+tengas
+tengo
+tengÃ¡is
+tenida
+tenidas
+tenido
+tenidos
+teniendo
+tenÃ©is
+tenÃ­a
+tenÃ­ais
+tenÃ­amos
+tenÃ­an
+tenÃ­as
+tercera
+ti
+tiempo
+tiene
+tienen
+tienes
+toda
+todas
+todavia
+todavÃ­a
+todo
+todos
+total
+trabaja
+trabajais
+trabajamos
+trabajan
+trabajar
+trabajas
+trabajo
+tras
+trata
+travÃ©s
+tres
+tu
+tus
+tuve
+tuviera
+tuvierais
+tuvieran
+tuvieras
+tuvieron
+tuviese
+tuvieseis
+tuviesen
+tuvieses
+tuvimos
+tuviste
+tuvisteis
+tuviÃ©ramos
+tuviÃ©semos
+tuvo
+tuya
+tuyas
+tuyo
+tuyos
+tÃº
+u
+ultimo
+un
+una
+unas
+uno
+unos
+usa
+usais
+usamos
+usan
+usar
+usas
+uso
+usted
+ustedes
+v
+va
+vais
+valor
+vamos
+van
+varias
+varios
+vaya
+veces
+ver
+verdad
+verdadera
+verdadero
+vez
+vosotras
+vosotros
+voy
+vuestra
+vuestras
+vuestro
+vuestros
+w
+x
+y
+ya
+yo
+z
+Ã©l
+Ã©ramos
+Ã©sa
+Ã©sas
+Ã©se
+Ã©sos
+Ã©sta
+Ã©stas
+Ã©ste
+Ã©stos
+Ãºltima
+Ãºltimas
+Ãºltimo
+Ãºltimos
\ No newline at end of file
diff --git a/static/stopwords/et b/static/stopwords/et
new file mode 100644
index 0000000..0914094
--- /dev/null
+++ b/static/stopwords/et
@@ -0,0 +1,35 @@
+aga
+ei
+et
+ja
+jah
+kas
+kui
+kÃµik
+ma
+me
+mida
+midagi
+mind
+minu
+mis
+mu
+mul
+mulle
+nad
+nii
+oled
+olen
+oli
+oma
+on
+pole
+sa
+seda
+see
+selle
+siin
+siis
+ta
+te
+Ã¤ra
\ No newline at end of file
diff --git a/static/stopwords/eu b/static/stopwords/eu
new file mode 100644
index 0000000..ded509c
--- /dev/null
+++ b/static/stopwords/eu
@@ -0,0 +1,98 @@
+al
+anitz
+arabera
+asko
+baina
+bat
+batean
+batek
+bati
+batzuei
+batzuek
+batzuetan
+batzuk
+bera
+beraiek
+berau
+berauek
+bere
+berori
+beroriek
+beste
+bezala
+da
+dago
+dira
+ditu
+du
+dute
+edo
+egin
+ere
+eta
+eurak
+ez
+gainera
+gu
+gutxi
+guzti
+haiei
+haiek
+haietan
+hainbeste
+hala
+han
+handik
+hango
+hara
+hari
+hark
+hartan
+hau
+hauei
+hauek
+hauetan
+hemen
+hemendik
+hemengo
+hi
+hona
+honek
+honela
+honetan
+honi
+hor
+hori
+horiei
+horiek
+horietan
+horko
+horra
+horrek
+horrela
+horretan
+horri
+hortik
+hura
+izan
+ni
+noiz
+nola
+non
+nondik
+nongo
+nor
+nora
+ze
+zein
+zen
+zenbait
+zenbat
+zer
+zergatik
+ziren
+zituen
+zu
+zuek
+zuen
+zuten
\ No newline at end of file
diff --git a/static/stopwords/fa b/static/stopwords/fa
new file mode 100644
index 0000000..4df893c
--- /dev/null
+++ b/static/stopwords/fa
@@ -0,0 +1,799 @@
+!
+,
+.
+:
+;
+Ø
+Ø
+Ø
+Ø¢Ø¨Ø§Ø¯
+Ø¢Ø±Ù
+Ø¢Ø±Û
+Ø¢ÙØ¯
+Ø¢ÙØ¯Ù
+Ø¢Ù
+Ø¢ÙØ§Ù
+Ø¢ÙØ¬Ø§
+Ø¢ÙØ·ÙØ±
+Ø¢ÙÙØ¯Ø±
+Ø¢ÙÙÙ
+Ø¢ÙÙØ§
+Ø¢ÙÚÙ
+Ø¢ÙÚ©Ù
+Ø¢ÙØ±Ø¯
+Ø¢ÙØ±Ø¯Ù
+Ø¢ÙØ¯
+Ø¢Û
+Ø¢ÛØ§
+Ø¢ÛÙØ¯
+Ø§ØªÙØ§ÙØ§
+Ø§Ø«Ø±Ù
+Ø§Ø­ØªØ±Ø§ÙØ§
+Ø§Ø­ØªÙØ§ÙØ§
+Ø§Ø®ÛØ±
+Ø§Ø±Û
+Ø§Ø²
+Ø§Ø²Ø¬ÙÙÙ
+Ø§Ø³Ø§Ø³Ø§
+Ø§Ø³Øª
+Ø§Ø³ØªÙØ§Ø¯
+Ø§Ø³ØªÙØ§Ø¯Ù
+Ø§Ø´
+Ø§Ø´Ú©Ø§Ø±Ø§
+Ø§ØµÙØ§
+Ø§ØµÙÙØ§
+Ø§Ø¹ÙØ§Ù
+Ø§ØºÙØ¨
+Ø§ÙÙÙÙ
+Ø§ÙØ§Ù
+Ø§ÙØ¨ØªÙ
+Ø§ÙØ¨ØªÙÙ
+Ø§Ù
+Ø§ÙØ§
+Ø§ÙØ±ÙØ²
+Ø§ÙØ±ÙØ²Ù
+Ø§ÙØ³Ø§Ù
+Ø§ÙØ´Ø¨
+Ø§ÙÙØ±
+Ø§Ù
+Ø§ÙØ¬Ø§Ù
+Ø§ÙØ¯
+Ø§ÙØ´Ø§Ø§ÙÙÙ
+Ø§ÙØµØ§ÙØ§
+Ø§ÙØ·ÙØ±
+Ø§ÙÙØ¯Ø±
+Ø§ÙÙØ§
+Ø§ÙÚÙØ§Ù
+Ø§ÙÚ©Ù
+Ø§ÙÚ¯Ø§Ø±
+Ø§Ù
+Ø§ÙÙ
+Ø§ÙÙØ§
+Ø§Ù
+Ø§ÙØ´Ø§Ù
+Ø§ÙÙ
+Ø§ÙÙ
+Ø§ÙÙÙÙ
+Ø§Ú©Ø«Ø±Ø§
+Ø§Ú©ÙÙÙ
+Ø§Ú¯Ø±
+Ø§Û
+Ø§ÛØ§
+Ø§ÛØ¯
+Ø§ÛØ´Ø§Ù
+Ø§ÛÙ
+Ø§ÛÙ
+Ø§ÛÙØ¬Ø§
+Ø§ÛÙØ¯
+Ø§ÛÙØ·ÙØ±
+Ø§ÛÙÙØ¯Ø±
+Ø§ÛÙÙØ§
+Ø§ÛÙÚÙÛÙ
+Ø§ÛÙÚ©
+Ø§ÛÙÚ©Ù
+Ø§ÛÙÚ¯ÙÙÙ
+Ø¨Ø§
+Ø¨Ø§Ø±
+Ø¨Ø§Ø±Ø©
+Ø¨Ø§Ø±Ù
+Ø¨Ø§Ø±ÙØ§
+Ø¨Ø§Ø²
+Ø¨Ø§Ø²ÙÙ
+Ø¨Ø§Ø´
+Ø¨Ø§Ø´Ø¯
+Ø¨Ø§Ø´Ù
+Ø¨Ø§Ø´ÙØ¯
+Ø¨Ø§Ø´ÙÙ
+Ø¨Ø§Ø´Û
+Ø¨Ø§Ø´ÛØ¯
+Ø¨Ø§Ø´ÛÙ
+Ø¨Ø§ÙØ§
+Ø¨Ø§ÙØ§Ø®Ø±Ù
+Ø¨Ø§ÙØ§ÛÙ
+Ø¨Ø§ÙØ·Ø¨Ø¹
+Ø¨Ø§ÙØ¯
+Ø¨Ø§ÛØ¯
+Ø¨ØªÙØ§Ù
+Ø¨ØªÙØ§ÙØ¯
+Ø¨ØªÙØ§ÙÛ
+Ø¨ØªÙØ§ÙÛÙ
+Ø¨Ø®Ø´
+Ø¨Ø®Ø´Û
+Ø¨Ø®ÙØ§Ù
+Ø¨Ø®ÙØ§ÙØ¯
+Ø¨Ø®ÙØ§ÙÙ
+Ø¨Ø®ÙØ§ÙÙØ¯
+Ø¨Ø®ÙØ§ÙÛ
+Ø¨Ø®ÙØ§ÙÛØ¯
+Ø¨Ø®ÙØ§ÙÛÙ
+Ø¨Ø¯
+Ø¨Ø¯ÙÙ
+Ø¨Ø±
+Ø¨Ø±Ø§Ø¨Ø±
+Ø¨Ø±Ø§Ø¨Ø±Ù
+Ø¨Ø±Ø§Ø­ØªÛ
+Ø¨Ø±Ø§Ø³Ø§Ø³
+Ø¨Ø±Ø§Ø³ØªÛ
+Ø¨Ø±Ø§Ù
+Ø¨Ø±Ø§Û
+Ø¨Ø±Ø§ÛÙ
+Ø¨Ø±Ø®ÙØ±Ø¯Ø§Ø±
+Ø¨Ø±Ø®Ù
+Ø¨Ø±Ø®Û
+Ø¨Ø±Ø¯Ø§Ø±Ù
+Ø¨Ø±Ø¹Ú©Ø³
+Ø¨Ø±ÙØ²
+Ø¨Ø²Ø±Ú¯
+Ø¨Ø²ÙØ¯Û
+Ø¨Ø³Ø§
+Ø¨Ø³ÙØ§Ø±
+Ø¨Ø³ÙØ§Ø±Ù
+Ø¨Ø³ÛØ§Ø±
+Ø¨Ø³ÛØ§Ø±Û
+Ø¨Ø·ÙØ±
+Ø¨Ø¹Ø¯
+Ø¨Ø¹Ø¯Ø§
+Ø¨Ø¹Ø¯ÙØ§
+Ø¨Ø¹Ø±Û
+Ø¨Ø¹Ø¶Ø§
+Ø¨Ø¹Ø¶Ù
+Ø¨ÙØ§ÙØ§ØµÙÙ
+Ø¨ÙÙÙ
+Ø¨ÙÙ
+Ø¨ÙÚ©Ù
+Ø¨ÙÛ
+Ø¨ÙØ§Ø¨Ø±Ø§ÙÙ
+Ø¨ÙØ§Ø¨Ø±Ø§ÛÙ
+Ø¨ÙØ¯Ù
+Ø¨Ù
+Ø¨ÙØªØ±
+Ø¨ÙØªØ±ÙÙ
+Ø¨ÙØ¯
+Ø¨ÙØ¯Ù
+Ø¨ÙØ¯Ù
+Ø¨ÙØ¯ÙØ¯
+Ø¨ÙØ¯Ù
+Ø¨ÙØ¯Û
+Ø¨ÙØ¯ÛØ¯
+Ø¨ÙØ¯ÛÙ
+Ø¨ÙÛÚÙ
+Ø¨Ù
+Ø¨ÙØ³Øª
+Ø¨ÙØ´
+Ø¨ÙØ´ØªØ±
+Ø¨ÙØ´ØªØ±Ù
+Ø¨ÙÙ
+Ø¨Ú©Ù
+Ø¨Ú©ÙØ¯
+Ø¨Ú©ÙÙ
+Ø¨Ú©ÙÙØ¯
+Ø¨Ú©ÙÛ
+Ø¨Ú©ÙÛØ¯
+Ø¨Ú©ÙÛÙ
+Ø¨Ú¯Ù
+Ø¨Ú¯ÙÛØ¯
+Ø¨Ú¯ÙÛÙ
+Ø¨Ú¯ÙÛÙØ¯
+Ø¨Ú¯ÙÛÛ
+Ø¨Ú¯ÙÛÛØ¯
+Ø¨Ú¯ÙÛÛÙ
+Ø¨Ú¯ÛØ±
+Ø¨Ú¯ÛØ±Ø¯
+Ø¨Ú¯ÛØ±Ù
+Ø¨Ú¯ÛØ±ÙØ¯
+Ø¨Ú¯ÛØ±Û
+Ø¨Ú¯ÛØ±ÛØ¯
+Ø¨Ú¯ÛØ±ÛÙ
+Ø¨Û
+Ø¨ÛØ§
+Ø¨ÛØ§Ø¨
+Ø¨ÛØ§Ø¨Ø¯
+Ø¨ÛØ§Ø¨Ù
+Ø¨ÛØ§Ø¨ÙØ¯
+Ø¨ÛØ§Ø¨Û
+Ø¨ÛØ§Ø¨ÛØ¯
+Ø¨ÛØ§Ø¨ÛÙ
+Ø¨ÛØ§ÙØ±
+Ø¨ÛØ§ÙØ±Ø¯
+Ø¨ÛØ§ÙØ±Ù
+Ø¨ÛØ§ÙØ±ÙØ¯
+Ø¨ÛØ§ÙØ±Û
+Ø¨ÛØ§ÙØ±ÛØ¯
+Ø¨ÛØ§ÙØ±ÛÙ
+Ø¨ÛØ§ÛØ¯
+Ø¨ÛØ§ÛÙ
+Ø¨ÛØ§ÛÙØ¯
+Ø¨ÛØ§ÛÛ
+Ø¨ÛØ§ÛÛØ¯
+Ø¨ÛØ§ÛÛÙ
+Ø¨ÛØ±ÙÙ
+Ø¨ÛØ±ÙÙÙ
+Ø¨ÛØ´
+Ø¨ÛØ´ØªØ±
+Ø¨ÛØ´ØªØ±Û
+Ø¨ÛÙ
+Øª
+ØªØ§
+ØªØ§Ø²Ù
+ØªØ§ÙÙÙÙ
+ØªØ§Ù
+ØªØ§Ú©ÙÙÙ
+ØªØ­Øª
+ØªØ±
+ØªØ±  Ø¨Ø±Ø§Ø³Ø§Ø³
+ØªØ±ÙÙ
+ØªÙØ±ÛØ¨Ø§
+ØªÙÙÛØ­Ø§
+ØªÙØ§Ù
+ØªÙØ§ÙØ§
+ØªÙØ§ÙÙ
+ØªÙÙØ§
+ØªÙ
+ØªÙØ§ÙØ¯
+ØªÙØ§ÙØ³Øª
+ØªÙØ§ÙØ³ØªÙ
+ØªÙØ§ÙØ³ØªÙ
+ØªÙØ§ÙØ³ØªÙØ¯
+ØªÙØ§ÙØ³ØªÙ
+ØªÙØ§ÙØ³ØªÛ
+ØªÙØ§ÙØ³ØªÛÙ
+ØªÙØ§ÙÙ
+ØªÙØ§ÙÙØ¯
+ØªÙØ§ÙÛ
+ØªÙØ§ÙÛØ¯
+ØªÙØ§ÙÛÙ
+ØªÙØ³Ø·
+ØªÙÙÙ
+ØªÙÛÙ
+Ø«Ø§ÙÛØ§
+Ø¬Ø§
+Ø¬Ø§Ù
+Ø¬Ø§ÙÙ
+Ø¬Ø§Û
+Ø¬Ø¯Ø§
+Ø¬Ø¯ÙØ¯
+Ø¬Ø¯ÛØ¯
+Ø¬Ø±ÙØ§Ù
+Ø¬Ø±ÛØ§Ù
+Ø¬Ø²
+Ø¬ÙÙÚ¯ÙØ±Ù
+Ø¬ÙÙÛÙ
+Ø¬ÙØ¹Ø§
+Ø¬ÙØ§Ø­
+Ø¬ÙØª
+Ø­Ø§Ø¶Ø±
+Ø­Ø§Ù
+Ø­Ø§ÙØ§
+Ø­ØªÙØ§
+Ø­ØªÙ
+Ø­ØªÛ
+Ø­Ø¯Ø§Ú©Ø«Ø±
+Ø­Ø¯ÙØ¯Ø§
+Ø­Ø¯ÙØ¯Ù
+Ø­Ù
+Ø®Ø§Ø±Ø¬Ù
+Ø®Ø¨
+Ø®Ø¯ÙØ§Øª
+Ø®ØµÙØµØ§
+Ø®ÙØ§ØµÙ
+Ø®ÙØ§Ø³Øª
+Ø®ÙØ§Ø³ØªÙ
+Ø®ÙØ§Ø³ØªÙ
+Ø®ÙØ§Ø³ØªÙØ¯
+Ø®ÙØ§Ø³ØªÙ
+Ø®ÙØ§Ø³ØªÛ
+Ø®ÙØ§Ø³ØªÛØ¯
+Ø®ÙØ§Ø³ØªÛÙ
+Ø®ÙØ§ÙØ¯
+Ø®ÙØ§ÙÙ
+Ø®ÙØ§ÙÙØ¯
+Ø®ÙØ§ÙÙÙ
+Ø®ÙØ§ÙÛ
+Ø®ÙØ§ÙÛØ¯
+Ø®ÙØ§ÙÛÙ
+Ø®ÙØ¨
+Ø®ÙØ¯
+Ø®ÙØ¯Øª
+Ø®ÙØ¯ØªØ§Ù
+Ø®ÙØ¯Ø´
+Ø®ÙØ¯Ø´Ø§Ù
+Ø®ÙØ¯Ù
+Ø®ÙØ¯ÙØ§Ù
+Ø®ÙØ´Ø¨Ø®ØªØ§ÙÙ
+Ø®ÙÙØ´
+Ø®ÙÛØ´
+Ø®ÙÛØ´ØªÙ
+Ø®ÛØ§Ù
+Ø®ÛØ±
+Ø®ÛÙÛ
+Ø¯Ø§Ø¯
+Ø¯Ø§Ø¯Ù
+Ø¯Ø§Ø¯Ù
+Ø¯Ø§Ø¯ÙØ¯
+Ø¯Ø§Ø¯Ù
+Ø¯Ø§Ø¯Û
+Ø¯Ø§Ø¯ÛØ¯
+Ø¯Ø§Ø¯ÛÙ
+Ø¯Ø§Ø±
+Ø¯Ø§Ø±Ø¯
+Ø¯Ø§Ø±Ù
+Ø¯Ø§Ø±ÙØ¯
+Ø¯Ø§Ø±ÙÙ
+Ø¯Ø§Ø±Û
+Ø¯Ø§Ø±ÛØ¯
+Ø¯Ø§Ø±ÛÙ
+Ø¯Ø§Ø´Øª
+Ø¯Ø§Ø´ØªÙ
+Ø¯Ø§Ø´ØªÙ
+Ø¯Ø§Ø´ØªÙØ¯
+Ø¯Ø§Ø´ØªÙ
+Ø¯Ø§Ø´ØªÛ
+Ø¯Ø§Ø´ØªÛØ¯
+Ø¯Ø§Ø´ØªÛÙ
+Ø¯Ø§ÙØ³Øª
+Ø¯Ø§ÙÙØ¯
+Ø¯Ø§ÛÙ
+Ø¯Ø§ÛÙØ§
+Ø¯Ø±
+Ø¯Ø±Ø¨Ø§Ø±Ù
+Ø¯Ø±ÙØ¬ÙÙØ¹
+Ø¯Ø±ÙÙ
+Ø¯Ø±ÛØº
+Ø¯ÙÛÙØ§
+Ø¯ÙØ¨Ø§ÙÙ
+Ø¯Ù
+Ø¯ÙØ¯
+Ø¯ÙÙ
+Ø¯ÙÙØ¯
+Ø¯ÙÛ
+Ø¯ÙÛØ¯
+Ø¯ÙÛÙ
+Ø¯Ù
+Ø¯ÙØ¨Ø§Ø±Ù
+Ø¯ÙÙ
+Ø¯ÙØ¯Ù
+Ø¯ÙØ±ÙØ²
+Ø¯ÙÚ¯Ø±
+Ø¯ÙÚ¯Ø±Ø§Ù
+Ø¯ÙÚ¯Ø±Ù
+Ø¯ÛØ±
+Ø¯ÛØ±ÙØ²
+Ø¯ÛÚ¯Ø±
+Ø¯ÛÚ¯Ø±Ø§Ù
+Ø¯ÛÚ¯Ø±Û
+Ø±Ø§
+Ø±Ø§Ø­Øª
+Ø±Ø§Ø³Ø§
+Ø±Ø§Ø³ØªÛ
+Ø±Ø§Ù
+Ø±Ø³ÙØ§
+Ø±Ø³ÛØ¯
+Ø±ÙØª
+Ø±ÙØªÙ
+Ø±Ù
+Ø±ÙØ¨
+Ø±ÙØ²
+Ø±ÙØ²Ø§ÙÙ
+Ø±ÙØ²ÙØ§Ù
+Ø±ÙÙ
+Ø±ÙÛ
+Ø±ÙÛÙ
+Ø±ÙØ²Ù
+Ø²ÙØ§Ù
+Ø²ÙØ§ÙÛ
+Ø²ÙÛÙÙ
+Ø²ÙØ¯
+Ø²ÙØ§Ø¯
+Ø²ÙØ±
+Ø²ÙØ±Ø§
+Ø²ÛØ±
+Ø²ÛØ±Ù
+Ø³Ø§Ø¨Ù
+Ø³Ø§Ø®ØªÙ
+Ø³Ø§Ø²Ù
+Ø³Ø§ÙØ§ÙÙ
+Ø³Ø§ÙÛØ§ÙÙ
+Ø³Ø§ÛØ±
+Ø³Ø±Ø§Ø³Ø±
+Ø³Ø±Ø§ÙØ¬Ø§Ù
+Ø³Ø±ÛØ¹Ø§
+Ø³Ø±ÛÙ
+Ø³Ø¹Ù
+Ø³ÙØªÙ
+Ø³ÙÙ
+Ø³ÙÙ
+Ø³ÙÛ
+Ø³ÙÛÙ
+Ø³Ù¾Ø³
+Ø´Ø§Ù
+Ø´Ø§ÙØ¯
+Ø´Ø§ÛØ¯
+Ø´Ø®ØµØ§
+Ø´Ø¯
+Ø´Ø¯Ù
+Ø´Ø¯Ù
+Ø´Ø¯ÙØ¯
+Ø´Ø¯Ù
+Ø´Ø¯Û
+Ø´Ø¯ÛØ¯
+Ø´Ø¯ÛØ¯Ø§
+Ø´Ø¯ÛÙ
+Ø´Ø´
+Ø´Ø´  ÙØ¯Ø§Ø´ØªÙ
+Ø´ÙØ§
+Ø´ÙØ§Ø³Ù
+Ø´ÙØ¯
+Ø´ÙÙ
+Ø´ÙÙØ¯
+Ø´ÙÙØ¯Ù
+Ø´ÙÛ
+Ø´ÙÛØ¯
+Ø´ÙÛÙ
+ØµØ±ÙØ§
+ØµÙØ±Øª
+Ø¶Ø¯ÙÙ
+Ø¶Ø¯ÙÙ
+Ø¶ÙÙ
+Ø·Ø¨Ø¹Ø§
+Ø·Ø¨ÙÙ
+Ø·Ø¨ÛØ¹ØªØ§
+Ø·Ø±Ù
+Ø·Ø±ÙÙ
+Ø·Ø±ÛÙ
+Ø·ÙØ±
+Ø·Ù
+Ø·Û
+Ø¸Ø§ÙØ±Ø§
+Ø¹Ø¯Ù
+Ø¹ÙØ¨Ù
+Ø¹ÙÙØªÙ
+Ø¹ÙÛÙ
+Ø¹ÙØ¯Ø§
+Ø¹ÙØ¯ØªØ§
+Ø¹ÙÙ
+Ø¹ÙÙØ§
+Ø¹ÙÙØ§Ù
+Ø¹ÙÙØ§ÙÙ
+ØºØ§ÙØ¨Ø§
+ØºÙØ±
+ØºÛØ±
+ÙØ±Ø¯Ø§
+ÙØ¹ÙØ§
+ÙÙØ·
+ÙÙØ±
+ÙÙÙ
+ÙØ§Ø¨Ù
+ÙØ¨Ù
+ÙØ¨ÙØ§
+ÙØ¯Ø±Û
+ÙØµØ¯Ù
+ÙØ·Ø¹Ø§
+ÙØ±Ø¯
+ÙØ±Ø¯Ù
+ÙØ±Ø¯Ù
+ÙØ±Ø¯ÙØ¯
+ÙØ±Ø¯Ù
+ÙØ³Ù
+ÙÙ
+ÙÙØªØ±
+ÙÙØ¯
+ÙÙÙ
+ÙÙÙØ¯
+ÙÙÙØ¯
+ÙÙÙÙ
+ÙÙ
+ÙØ§Ø§ÙÙ
+ÙØ·ÙØ§
+ÙØ·ÙØ§Ù
+ÙØ§
+ÙØ§Ù
+ÙØ§ÙÙØ¯
+ÙØ§ÙÙØ¯Ù
+ÙØ¨Ø§Ø¯Ø§
+ÙØªØ§Ø³ÙØ§ÙÙ
+ÙØªØ¹Ø§ÙØ¨Ø§
+ÙØ«Ù
+ÙØ«ÙØ§
+ÙØ«ÙÙ
+ÙØ¬Ø§ÙÛ
+ÙØ¬Ø¯Ø¯Ø§
+ÙØ¬ÙÙØ¹Ø§
+ÙØ®ØªÙÙ
+ÙØ¯Ø§Ù
+ÙØ¯Øª
+ÙØ¯ÙØªÛ
+ÙØ±Ø¯Ù
+ÙØ±Ø³Û
+ÙØ³ØªÙÛÙØ§
+ÙØ³ÙÙØ§
+ÙØ·ÙÛÙØ§
+ÙØ¹ÙÙÙØ§
+ÙÙØ§Ø¨Ù
+ÙÙÚ©Ù
+ÙÙ
+ÙÙØ§Ø±Ø¯
+ÙÙØ±Ø¯
+ÙÙÙØªØ§
+ÙÙ
+ÙÙÙÙØ§Ø±Ø¯
+ÙÙÙÙÙÙ
+ÙÚ¯Ø±
+ÙÛ
+ÙÛ Ø´ÙØ¯
+ÙÛØ§Ù
+ÙÛâØ±Ø³Ø¯
+ÙÛâØ±ÙØ¯
+ÙÛâØ´ÙØ¯
+ÙÛâÚ©ÙÛÙ
+ÙØ§Ø´Ù
+ÙØ§Ù
+ÙØ§Ú¯Ø§Ù
+ÙØ§Ú¯ÙØ§Ù
+ÙØ§Ú¯ÙØ§ÙÛ
+ÙØ¨Ø§ÙØ¯
+ÙØ¨Ø§ÛØ¯
+ÙØ¨ÙØ¯
+ÙØ®Ø³Øª
+ÙØ®Ø³ØªÙÙ
+ÙØ®ÙØ§ÙØ¯
+ÙØ®ÙØ§ÙÙ
+ÙØ®ÙØ§ÙÙØ¯
+ÙØ®ÙØ§ÙÛ
+ÙØ®ÙØ§ÙÛØ¯
+ÙØ®ÙØ§ÙÛÙ
+ÙØ¯Ø§Ø±Ø¯
+ÙØ¯Ø§Ø±Ù
+ÙØ¯Ø§Ø±ÙØ¯
+ÙØ¯Ø§Ø±Û
+ÙØ¯Ø§Ø±ÛØ¯
+ÙØ¯Ø§Ø±ÛÙ
+ÙØ¯Ø§Ø´Øª
+ÙØ¯Ø§Ø´ØªÙ
+ÙØ¯Ø§Ø´ØªÙØ¯
+ÙØ¯Ø§Ø´ØªÙ
+ÙØ¯Ø§Ø´ØªÛ
+ÙØ¯Ø§Ø´ØªÛØ¯
+ÙØ¯Ø§Ø´ØªÛÙ
+ÙØ²Ø¯ÙÙ
+ÙØ²Ø¯Ù
+ÙØ²Ø¯ÛÚ©Ù
+ÙØ³Ø¨ØªØ§
+ÙØ´Ø§Ù
+ÙØ´Ø¯Ù
+ÙØ¸ÙØ±
+ÙØ¸ÛØ±
+ÙÙØ±Ø¯Ù
+ÙÙØ§ÙØ¯
+ÙÙÙ
+ÙÙÛ
+ÙÙÛâØ´ÙØ¯
+ÙÙ
+ÙÙØ§ÛØªØ§
+ÙÙØ¹
+ÙÙØ¹Ù
+ÙÙØ¹Û
+ÙÙØ²
+ÙÙØ³Øª
+ÙÚ¯Ø§Ù
+ÙÛØ²
+ÙÛØ³Øª
+ÙØ§
+ÙØ§Ù
+ÙØ§ÙÙ
+ÙØ§Û
+ÙØ§ÛÛ
+ÙØ¨Ú
+ÙØ±
+ÙØ±ÚÙ
+ÙØ±Ú¯Ø²
+ÙØ²Ø§Ø±
+ÙØ³Øª
+ÙØ³ØªÙ
+ÙØ³ØªÙØ¯
+ÙØ³ØªÙÙ
+ÙØ³ØªÛ
+ÙØ³ØªÛØ¯
+ÙØ³ØªÛÙ
+ÙÙØª
+ÙÙ
+ÙÙØ§Ù
+ÙÙÙ
+ÙÙÙØ§Ø±Ù
+ÙÙÙÙ
+ÙÙÚÙØ§Ù
+ÙÙÚÙÙÙ
+ÙÙÚÙÛÙ
+ÙÙÚÙÙ
+ÙÙÛØ´Ù
+ÙÙÛÙ
+ÙÙÙØ²
+ÙÙÚ¯Ø§Ù
+ÙÙÚ¯Ø§ÙÙ
+ÙÙÚ¯Ø§ÙÛ
+ÙÙÚ
+ÙÛÚ
+ÙÛÚÚ¯Ø§Ù
+Ù
+ÙØ§ÙØ¹Ø§
+ÙØ§ÙØ¹Û
+ÙØ¬ÙØ¯
+ÙØ³Ø·Ù
+ÙØ¶Ø¹
+ÙÙØªÙ
+ÙÙØªÛ
+ÙÙØªÛÚ©Ù
+ÙÙÛ
+ÙÙ
+ÙÚ¯Ù
+ÙÛ
+ÙÛÚÙ
+ÙØ§
+ÙØ§Ø¨Ø¯
+ÙÙ
+ÙÙØ¯ÙÚ¯Ø±
+ÙÙÙ
+ÙÙ
+Ùª
+Ù¾Ø§Ø±Ø³Ø§Ù
+Ù¾Ø§Ø¹ÛÙÙ
+Ù¾Ø³
+Ù¾ÙØ¬
+Ù¾ÙØ´
+Ù¾ÛØ¯Ø§
+Ù¾ÛØ´
+Ù¾ÛØ´Ø§Ù¾ÛØ´
+Ù¾ÛØ´ØªØ±
+Ù¾ÛØ´Ù
+ÚØ±Ø§
+ÚØ·ÙØ±
+ÚÙØ¯Ø±
+ÚÙØ§Ù
+ÚÙØ§ÙÚÙ
+ÚÙØ§ÙÚ©Ù
+ÚÙØ¯
+ÚÙØ¯ÛÙ
+ÚÙÙÙ
+ÚÙÛÙ
+ÚÙ
+ÚÙØ§Ø±
+ÚÙ
+ÚÙÙ
+ÚÙØ²Ù
+ÚÚ¯ÙÙÙ
+ÚÛØ²
+ÚÛØ²Û
+ÚÛØ³Øª
+Ú©Ø§Ø´
+Ú©Ø§ÙÙ
+Ú©Ø§ÙÙØ§
+Ú©ØªØ¨Ø§
+Ú©Ø¬Ø§
+Ú©Ø¬Ø§Ø³Øª
+Ú©Ø¯Ø§Ù
+Ú©Ø±Ø¯
+Ú©Ø±Ø¯Ù
+Ú©Ø±Ø¯Ù
+Ú©Ø±Ø¯ÙØ¯
+Ú©Ø±Ø¯Ù
+Ú©Ø±Ø¯Û
+Ú©Ø±Ø¯ÛØ¯
+Ú©Ø±Ø¯ÛÙ
+Ú©Ø³
+Ú©Ø³Ø§ÙÛ
+Ú©Ø³Û
+Ú©Ù
+Ú©ÙØ§
+Ú©Ù
+Ú©ÙØ§Ú©Ø§Ù
+Ú©ÙØªØ±
+Ú©ÙØªØ±Û
+Ú©ÙÛ
+Ú©Ù
+Ú©ÙØ§Ø±
+Ú©ÙØ§Ø±Ù
+Ú©ÙØ¯
+Ú©ÙÙ
+Ú©ÙÙØ¯
+Ú©ÙÙØ¯Ù
+Ú©ÙÙÙ
+Ú©ÙÙÙÛ
+Ú©ÙÛ
+Ú©ÙÛØ¯
+Ú©ÙÛÙ
+Ú©Ù
+Ú©Ù
+Ú©ÙÛ
+Ú©Û
+Ú¯Ø§Ù
+Ú¯Ø§ÙÛ
+Ú¯Ø°Ø§Ø±Ù
+Ú¯Ø°Ø§Ø´ØªÙ
+Ú¯Ø°Ø´ØªÙ
+Ú¯Ø±Ø¯Ø¯
+Ú¯Ø±ÙØª
+Ú¯Ø±ÙØªÙ
+Ú¯Ø±ÙØªÙ
+Ú¯Ø±ÙØªÙØ¯
+Ú¯Ø±ÙØªÙ
+Ú¯Ø±ÙØªÛ
+Ú¯Ø±ÙØªÛØ¯
+Ú¯Ø±ÙØªÛÙ
+Ú¯Ø±ÙÙÙ
+Ú¯ÙØª
+Ú¯ÙØªÙ
+Ú¯ÙØªÙ
+Ú¯ÙØªÙØ¯
+Ú¯ÙØªÙ
+Ú¯ÙØªÛ
+Ú¯ÙØªÛØ¯
+Ú¯ÙØªÛÙ
+Ú¯Ù
+Ú¯ÙÚ¯Ø§Ù
+Ú¯Ù
+Ú¯ÙÙØ¯
+Ú¯ÙÙÙØ¯
+Ú¯ÙÛØ§
+Ú¯ÙÛØ¯
+Ú¯ÙÛÙ
+Ú¯ÙÛÙØ¯
+Ú¯ÙÛÛ
+Ú¯ÙÛÛØ¯
+Ú¯ÙÛÛÙ
+Ú¯ÙØ±Ø¯
+Ú¯ÙØ±Ù
+Ú¯ÛØ±Ø¯
+Ú¯ÛØ±Ù
+Ú¯ÛØ±ÙØ¯
+Ú¯ÛØ±Û
+Ú¯ÛØ±ÛØ¯
+Ú¯ÛØ±ÛÙ
+Û
+ÛØ§
+ÛØ§Ø¨Ø¯
+ÛØ§Ø¨Ù
+ÛØ§Ø¨ÙØ¯
+ÛØ§Ø¨Û
+ÛØ§Ø¨ÛØ¯
+ÛØ§Ø¨ÛÙ
+ÛØ§ÙØª
+ÛØ§ÙØªÙ
+ÛØ§ÙØªÙ
+ÛØ§ÙØªÙ
+ÛØ§ÙØªÛ
+ÛØ§ÙØªÛØ¯
+ÛØ§ÙØªÛÙ
+ÛØ¹ÙÛ
+ÛÙÛÙØ§
+ÛÙ
+ÛÚ©
+ÛÚ©Û
+Û°
+Û±
+Û²
+Û³
+Û´
+Ûµ
+Û¶
+Û·
+Û¸
+Û¹
\ No newline at end of file
diff --git a/static/stopwords/fi b/static/stopwords/fi
new file mode 100644
index 0000000..84f0006
--- /dev/null
+++ b/static/stopwords/fi
@@ -0,0 +1,847 @@
+aiemmin
+aika
+aikaa
+aikaan
+aikaisemmin
+aikaisin
+aikajen
+aikana
+aikoina
+aikoo
+aikovat
+aina
+ainakaan
+ainakin
+ainoa
+ainoat
+aiomme
+aion
+aiotte
+aist
+aivan
+ajan
+alas
+alemmas
+alkuisin
+alkuun
+alla
+alle
+aloitamme
+aloitan
+aloitat
+aloitatte
+aloitattivat
+aloitettava
+aloitettevaksi
+aloitettu
+aloitimme
+aloitin
+aloitit
+aloititte
+aloittaa
+aloittamatta
+aloitti
+aloittivat
+alta
+aluksi
+alussa
+alusta
+annettavaksi
+annetteva
+annettu
+ansiosta
+antaa
+antamatta
+antoi
+aoua
+apu
+asia
+asiaa
+asian
+asiasta
+asiat
+asioiden
+asioihin
+asioita
+asti
+avuksi
+avulla
+avun
+avutta
+edelle
+edelleen
+edellÃ¤
+edeltÃ¤
+edemmÃ¤s
+edes
+edessÃ¤
+edestÃ¤
+ehkÃ¤
+ei
+eikÃ¤
+eilen
+eivÃ¤t
+eli
+ellei
+elleivÃ¤t
+ellemme
+ellen
+ellet
+ellette
+emme
+en
+enemmÃ¤n
+eniten
+ennen
+ensi
+ensimmÃ¤inen
+ensimmÃ¤iseksi
+ensimmÃ¤isen
+ensimmÃ¤isenÃ¤
+ensimmÃ¤iset
+ensimmÃ¤isiksi
+ensimmÃ¤isinÃ¤
+ensimmÃ¤isiÃ¤
+ensimmÃ¤istÃ¤
+ensin
+entinen
+entisen
+entisiÃ¤
+entisten
+entistÃ¤
+enÃ¤Ã¤
+eri
+erittÃ¤in
+erityisesti
+erÃ¤iden
+erÃ¤s
+erÃ¤Ã¤t
+esi
+esiin
+esillÃ¤
+esimerkiksi
+et
+eteen
+etenkin
+etessa
+ette
+ettei
+ettÃ¤
+haikki
+halua
+haluaa
+haluamatta
+haluamme
+haluan
+haluat
+haluatte
+haluavat
+halunnut
+halusi
+halusimme
+halusin
+halusit
+halusitte
+halusivat
+halutessa
+haluton
+he
+hei
+heidÃ¤n
+heidÃ¤t
+heihin
+heille
+heillÃ¤
+heiltÃ¤
+heissÃ¤
+heistÃ¤
+heitÃ¤
+helposti
+heti
+hetkellÃ¤
+hieman
+hitaasti
+hoikein
+huolimatta
+huomenna
+hyvien
+hyviin
+hyviksi
+hyville
+hyviltÃ¤
+hyvin
+hyvinÃ¤
+hyvissÃ¤
+hyvistÃ¤
+hyviÃ¤
+hyvÃ¤
+hyvÃ¤t
+hyvÃ¤Ã¤
+hÃ¤n
+hÃ¤neen
+hÃ¤nelle
+hÃ¤nellÃ¤
+hÃ¤neltÃ¤
+hÃ¤nen
+hÃ¤nessÃ¤
+hÃ¤nestÃ¤
+hÃ¤net
+hÃ¤ntÃ¤
+ihan
+ilman
+ilmeisesti
+itse
+itsensÃ¤
+itseÃ¤Ã¤n
+ja
+jo
+johon
+joiden
+joihin
+joiksi
+joilla
+joille
+joilta
+joina
+joissa
+joista
+joita
+joka
+jokainen
+jokin
+joko
+joksi
+joku
+jolla
+jolle
+jolloin
+jolta
+jompikumpi
+jona
+jonka
+jonkin
+jonne
+joo
+jopa
+jos
+joskus
+jossa
+josta
+jota
+jotain
+joten
+jotenkin
+jotenkuten
+jotka
+jotta
+jouduimme
+jouduin
+jouduit
+jouduitte
+joudumme
+joudun
+joudutte
+joukkoon
+joukossa
+joukosta
+joutua
+joutui
+joutuivat
+joutumaan
+joutuu
+joutuvat
+juuri
+jÃ¤lkeen
+jÃ¤lleen
+jÃ¤Ã¤
+kahdeksan
+kahdeksannen
+kahdella
+kahdelle
+kahdelta
+kahden
+kahdessa
+kahdesta
+kahta
+kahteen
+kai
+kaiken
+kaikille
+kaikilta
+kaikkea
+kaikki
+kaikkia
+kaikkiaan
+kaikkialla
+kaikkialle
+kaikkialta
+kaikkien
+kaikkin
+kaksi
+kannalta
+kannattaa
+kanssa
+kanssaan
+kanssamme
+kanssani
+kanssanne
+kanssasi
+kauan
+kauemmas
+kaukana
+kautta
+kehen
+keiden
+keihin
+keiksi
+keille
+keillÃ¤
+keiltÃ¤
+keinÃ¤
+keissÃ¤
+keistÃ¤
+keitten
+keittÃ¤
+keitÃ¤
+keneen
+keneksi
+kenelle
+kenellÃ¤
+keneltÃ¤
+kenen
+kenenÃ¤
+kenessÃ¤
+kenestÃ¤
+kenet
+kenettÃ¤
+kennessÃ¤stÃ¤
+kenties
+kerran
+kerta
+kertaa
+keskellÃ¤
+kesken
+keskimÃ¤Ã¤rin
+ketkÃ¤
+ketÃ¤
+kiitos
+kohti
+koko
+kokonaan
+kolmas
+kolme
+kolmen
+kolmesti
+koska
+koskaan
+kovin
+kuin
+kuinka
+kuinkan
+kuitenkaan
+kuitenkin
+kuka
+kukaan
+kukin
+kukka
+kumpainen
+kumpainenkaan
+kumpi
+kumpikaan
+kumpikin
+kun
+kuten
+kuuden
+kuusi
+kuutta
+kylliksi
+kyllÃ¤
+kymmenen
+kyse
+liian
+liki
+lisÃ¤ksi
+lisÃ¤Ã¤
+lla
+luo
+luona
+lÃ¤hekkÃ¤in
+lÃ¤helle
+lÃ¤hellÃ¤
+lÃ¤heltÃ¤
+lÃ¤hemmÃ¤s
+lÃ¤hes
+lÃ¤hinnÃ¤
+lÃ¤htien
+lÃ¤pi
+mahdollisimman
+mahdollista
+me
+meidÃ¤n
+meidÃ¤t
+meihin
+meille
+meillÃ¤
+meiltÃ¤
+meissÃ¤
+meistÃ¤
+meitÃ¤
+melkein
+melko
+menee
+meneet
+menemme
+menen
+menet
+menette
+menevÃ¤t
+meni
+menimme
+menin
+menit
+menivÃ¤t
+mennessÃ¤
+mennyt
+menossa
+mihin
+mikin
+miksi
+mikÃ¤
+mikÃ¤li
+mikÃ¤Ã¤n
+mille
+milloin
+milloinkan
+millÃ¤
+miltÃ¤
+minkÃ¤
+minne
+minua
+minulla
+minulle
+minulta
+minun
+minussa
+minusta
+minut
+minuun
+minÃ¤
+missÃ¤
+mistÃ¤
+miten
+mitkÃ¤
+mitÃ¤
+mitÃ¤Ã¤n
+moi
+molemmat
+mones
+monesti
+monet
+moni
+moniaalla
+moniaalle
+moniaalta
+monta
+muassa
+muiden
+muita
+muka
+mukaan
+mukaansa
+mukana
+mutta
+muu
+muualla
+muualle
+muualta
+muuanne
+muulloin
+muun
+muut
+muuta
+muutama
+muutaman
+muuten
+myÃ¶hemmin
+myÃ¶s
+myÃ¶skin
+myÃ¶skÃ¤Ã¤n
+myÃ¶tÃ¤
+ne
+neljÃ¤
+neljÃ¤n
+neljÃ¤Ã¤
+niiden
+niihin
+niiksi
+niille
+niillÃ¤
+niiltÃ¤
+niin
+niinÃ¤
+niissÃ¤
+niistÃ¤
+niitÃ¤
+noiden
+noihin
+noiksi
+noilla
+noille
+noilta
+noin
+noina
+noissa
+noista
+noita
+nopeammin
+nopeasti
+nopeiten
+nro
+nuo
+nyt
+nÃ¤iden
+nÃ¤ihin
+nÃ¤iksi
+nÃ¤ille
+nÃ¤illÃ¤
+nÃ¤iltÃ¤
+nÃ¤in
+nÃ¤inÃ¤
+nÃ¤issÃ¤
+nÃ¤issÃ¤hin
+nÃ¤issÃ¤lle
+nÃ¤issÃ¤ltÃ¤
+nÃ¤issÃ¤stÃ¤
+nÃ¤istÃ¤
+nÃ¤itÃ¤
+nÃ¤mÃ¤
+ohi
+oikea
+oikealla
+oikein
+ole
+olemme
+olen
+olet
+olette
+oleva
+olevan
+olevat
+oli
+olimme
+olin
+olisi
+olisimme
+olisin
+olisit
+olisitte
+olisivat
+olit
+olitte
+olivat
+olla
+olleet
+olli
+ollut
+oma
+omaa
+omaan
+omaksi
+omalle
+omalta
+oman
+omassa
+omat
+omia
+omien
+omiin
+omiksi
+omille
+omilta
+omissa
+omista
+on
+onkin
+onko
+ovat
+paikoittain
+paitsi
+pakosti
+paljon
+paremmin
+parempi
+parhaillaan
+parhaiten
+perusteella
+perÃ¤ti
+pian
+pieneen
+pieneksi
+pienelle
+pienellÃ¤
+pieneltÃ¤
+pienempi
+pienestÃ¤
+pieni
+pienin
+poikki
+puolesta
+puolestaan
+pÃ¤Ã¤lle
+runsaasti
+saakka
+sadam
+sama
+samaa
+samaan
+samalla
+samallalta
+samallassa
+samallasta
+saman
+samat
+samoin
+sata
+sataa
+satojen
+se
+seitsemÃ¤n
+sekÃ¤
+sen
+seuraavat
+siellÃ¤
+sieltÃ¤
+siihen
+siinÃ¤
+siis
+siitÃ¤
+sijaan
+siksi
+sille
+silloin
+sillÃ¤
+silti
+siltÃ¤
+sinne
+sinua
+sinulla
+sinulle
+sinulta
+sinun
+sinussa
+sinusta
+sinut
+sinuun
+sinÃ¤
+sisÃ¤kkÃ¤in
+sisÃ¤llÃ¤
+siten
+sitten
+sitÃ¤
+ssa
+sta
+suoraan
+suuntaan
+suuren
+suuret
+suuri
+suuria
+suurin
+suurten
+taa
+taas
+taemmas
+tahansa
+tai
+takaa
+takaisin
+takana
+takia
+tallÃ¤
+tapauksessa
+tarpeeksi
+tavalla
+tavoitteena
+te
+teidÃ¤n
+teidÃ¤t
+teihin
+teille
+teillÃ¤
+teiltÃ¤
+teissÃ¤
+teistÃ¤
+teitÃ¤
+tietysti
+todella
+toinen
+toisaalla
+toisaalle
+toisaalta
+toiseen
+toiseksi
+toisella
+toiselle
+toiselta
+toisemme
+toisen
+toisensa
+toisessa
+toisesta
+toista
+toistaiseksi
+toki
+tosin
+tuhannen
+tuhat
+tule
+tulee
+tulemme
+tulen
+tulet
+tulette
+tulevat
+tulimme
+tulin
+tulisi
+tulisimme
+tulisin
+tulisit
+tulisitte
+tulisivat
+tulit
+tulitte
+tulivat
+tulla
+tulleet
+tullut
+tuntuu
+tuo
+tuohon
+tuoksi
+tuolla
+tuolle
+tuolloin
+tuolta
+tuon
+tuona
+tuonne
+tuossa
+tuosta
+tuota
+tuotÃ¤
+tuskin
+tykÃ¶
+tÃ¤hÃ¤n
+tÃ¤ksi
+tÃ¤lle
+tÃ¤llÃ¤
+tÃ¤llÃ¶in
+tÃ¤ltÃ¤
+tÃ¤mÃ¤
+tÃ¤mÃ¤n
+tÃ¤nne
+tÃ¤nÃ¤
+tÃ¤nÃ¤Ã¤n
+tÃ¤ssÃ¤
+tÃ¤stÃ¤
+tÃ¤ten
+tÃ¤tÃ¤
+tÃ¤ysin
+tÃ¤ytyvÃ¤t
+tÃ¤ytyy
+tÃ¤Ã¤llÃ¤
+tÃ¤Ã¤ltÃ¤
+ulkopuolella
+usea
+useasti
+useimmiten
+usein
+useita
+uudeksi
+uudelleen
+uuden
+uudet
+uusi
+uusia
+uusien
+uusinta
+uuteen
+uutta
+vaan
+vahemmÃ¤n
+vai
+vaiheessa
+vaikea
+vaikean
+vaikeat
+vaikeilla
+vaikeille
+vaikeilta
+vaikeissa
+vaikeista
+vaikka
+vain
+varmasti
+varsin
+varsinkin
+varten
+vasen
+vasenmalla
+vasta
+vastaan
+vastakkain
+vastan
+verran
+vielÃ¤
+vierekkÃ¤in
+vieressÃ¤
+vieri
+viiden
+viime
+viimeinen
+viimeisen
+viimeksi
+viisi
+voi
+voidaan
+voimme
+voin
+voisi
+voit
+voitte
+voivat
+vuoden
+vuoksi
+vuosi
+vuosien
+vuosina
+vuotta
+vÃ¤hemmÃ¤n
+vÃ¤hintÃ¤Ã¤n
+vÃ¤hiten
+vÃ¤hÃ¤n
+vÃ¤lillÃ¤
+yhdeksÃ¤n
+yhden
+yhdessÃ¤
+yhteen
+yhteensÃ¤
+yhteydessÃ¤
+yhteyteen
+yhtÃ¤
+yhtÃ¤Ã¤lle
+yhtÃ¤Ã¤llÃ¤
+yhtÃ¤Ã¤ltÃ¤
+yhtÃ¤Ã¤n
+yhÃ¤
+yksi
+yksin
+yksittÃ¤in
+yleensÃ¤
+ylemmÃ¤s
+yli
+ylÃ¶s
+ympÃ¤ri
+Ã¤lkÃ¶Ã¶n
+Ã¤lÃ¤
\ No newline at end of file
diff --git a/static/stopwords/fr b/static/stopwords/fr
new file mode 100644
index 0000000..18dd261
--- /dev/null
+++ b/static/stopwords/fr
@@ -0,0 +1,691 @@
+a
+abord
+absolument
+afin
+ah
+ai
+aie
+aient
+aies
+ailleurs
+ainsi
+ait
+allaient
+allo
+allons
+allÃ´
+alors
+anterieur
+anterieure
+anterieures
+apres
+aprÃ¨s
+as
+assez
+attendu
+au
+aucun
+aucune
+aucuns
+aujourd
+aujourd'hui
+aupres
+auquel
+aura
+aurai
+auraient
+aurais
+aurait
+auras
+aurez
+auriez
+aurions
+aurons
+auront
+aussi
+autant
+autre
+autrefois
+autrement
+autres
+autrui
+aux
+auxquelles
+auxquels
+avaient
+avais
+avait
+avant
+avec
+avez
+aviez
+avions
+avoir
+avons
+ayant
+ayez
+ayons
+b
+bah
+bas
+basee
+bat
+beau
+beaucoup
+bien
+bigre
+bon
+boum
+bravo
+brrr
+c
+car
+ce
+ceci
+cela
+celle
+celle-ci
+celle-lÃ 
+celles
+celles-ci
+celles-lÃ 
+celui
+celui-ci
+celui-lÃ 
+celÃ 
+cent
+cependant
+certain
+certaine
+certaines
+certains
+certes
+ces
+cet
+cette
+ceux
+ceux-ci
+ceux-lÃ 
+chacun
+chacune
+chaque
+cher
+chers
+chez
+chiche
+chut
+chÃ¨re
+chÃ¨res
+ci
+cinq
+cinquantaine
+cinquante
+cinquantiÃ¨me
+cinquiÃ¨me
+clac
+clic
+combien
+comme
+comment
+comparable
+comparables
+compris
+concernant
+contre
+couic
+crac
+d
+da
+dans
+de
+debout
+dedans
+dehors
+deja
+delÃ 
+depuis
+dernier
+derniere
+derriere
+derriÃ¨re
+des
+desormais
+desquelles
+desquels
+dessous
+dessus
+deux
+deuxiÃ¨me
+deuxiÃ¨mement
+devant
+devers
+devra
+devrait
+different
+differentes
+differents
+diffÃ©rent
+diffÃ©rente
+diffÃ©rentes
+diffÃ©rents
+dire
+directe
+directement
+dit
+dite
+dits
+divers
+diverse
+diverses
+dix
+dix-huit
+dix-neuf
+dix-sept
+dixiÃ¨me
+doit
+doivent
+donc
+dont
+dos
+douze
+douziÃ¨me
+dring
+droite
+du
+duquel
+durant
+dÃ¨s
+dÃ©but
+dÃ©sormais
+e
+effet
+egale
+egalement
+egales
+eh
+elle
+elle-mÃªme
+elles
+elles-mÃªmes
+en
+encore
+enfin
+entre
+envers
+environ
+es
+essai
+est
+et
+etant
+etc
+etre
+eu
+eue
+eues
+euh
+eurent
+eus
+eusse
+eussent
+eusses
+eussiez
+eussions
+eut
+eux
+eux-mÃªmes
+exactement
+exceptÃ©
+extenso
+exterieur
+eÃ»mes
+eÃ»t
+eÃ»tes
+f
+fais
+faisaient
+faisant
+fait
+faites
+faÃ§on
+feront
+fi
+flac
+floc
+fois
+font
+force
+furent
+fus
+fusse
+fussent
+fusses
+fussiez
+fussions
+fut
+fÃ»mes
+fÃ»t
+fÃ»tes
+g
+gens
+h
+ha
+haut
+hein
+hem
+hep
+hi
+ho
+holÃ 
+hop
+hormis
+hors
+hou
+houp
+hue
+hui
+huit
+huitiÃ¨me
+hum
+hurrah
+hÃ©
+hÃ©las
+i
+ici
+il
+ils
+importe
+j
+je
+jusqu
+jusque
+juste
+k
+l
+la
+laisser
+laquelle
+las
+le
+lequel
+les
+lesquelles
+lesquels
+leur
+leurs
+longtemps
+lors
+lorsque
+lui
+lui-meme
+lui-mÃªme
+lÃ 
+lÃ¨s
+m
+ma
+maint
+maintenant
+mais
+malgre
+malgrÃ©
+maximale
+me
+meme
+memes
+merci
+mes
+mien
+mienne
+miennes
+miens
+mille
+mince
+mine
+minimale
+moi
+moi-meme
+moi-mÃªme
+moindres
+moins
+mon
+mot
+moyennant
+multiple
+multiples
+mÃªme
+mÃªmes
+n
+na
+naturel
+naturelle
+naturelles
+ne
+neanmoins
+necessaire
+necessairement
+neuf
+neuviÃ¨me
+ni
+nombreuses
+nombreux
+nommÃ©s
+non
+nos
+notamment
+notre
+nous
+nous-mÃªmes
+nouveau
+nouveaux
+nul
+nÃ©anmoins
+nÃ´tre
+nÃ´tres
+o
+oh
+ohÃ©
+ollÃ©
+olÃ©
+on
+ont
+onze
+onziÃ¨me
+ore
+ou
+ouf
+ouias
+oust
+ouste
+outre
+ouvert
+ouverte
+ouverts
+o|
+oÃ¹
+p
+paf
+pan
+par
+parce
+parfois
+parle
+parlent
+parler
+parmi
+parole
+parseme
+partant
+particulier
+particuliÃ¨re
+particuliÃ¨rement
+pas
+passÃ©
+pendant
+pense
+permet
+personne
+personnes
+peu
+peut
+peuvent
+peux
+pff
+pfft
+pfut
+pif
+pire
+piÃ¨ce
+plein
+plouf
+plupart
+plus
+plusieurs
+plutÃ´t
+possessif
+possessifs
+possible
+possibles
+pouah
+pour
+pourquoi
+pourrais
+pourrait
+pouvait
+prealable
+precisement
+premier
+premiÃ¨re
+premiÃ¨rement
+pres
+probable
+probante
+procedant
+proche
+prÃ¨s
+psitt
+pu
+puis
+puisque
+pur
+pure
+q
+qu
+quand
+quant
+quant-Ã -soi
+quanta
+quarante
+quatorze
+quatre
+quatre-vingt
+quatriÃ¨me
+quatriÃ¨mement
+que
+quel
+quelconque
+quelle
+quelles
+quelqu'un
+quelque
+quelques
+quels
+qui
+quiconque
+quinze
+quoi
+quoique
+r
+rare
+rarement
+rares
+relative
+relativement
+remarquable
+rend
+rendre
+restant
+reste
+restent
+restrictif
+retour
+revoici
+revoilÃ 
+rien
+s
+sa
+sacrebleu
+sait
+sans
+sapristi
+sauf
+se
+sein
+seize
+selon
+semblable
+semblaient
+semble
+semblent
+sent
+sept
+septiÃ¨me
+sera
+serai
+seraient
+serais
+serait
+seras
+serez
+seriez
+serions
+serons
+seront
+ses
+seul
+seule
+seulement
+si
+sien
+sienne
+siennes
+siens
+sinon
+six
+sixiÃ¨me
+soi
+soi-mÃªme
+soient
+sois
+soit
+soixante
+sommes
+son
+sont
+sous
+souvent
+soyez
+soyons
+specifique
+specifiques
+speculatif
+stop
+strictement
+subtiles
+suffisant
+suffisante
+suffit
+suis
+suit
+suivant
+suivante
+suivantes
+suivants
+suivre
+sujet
+superpose
+sur
+surtout
+t
+ta
+tac
+tandis
+tant
+tardive
+te
+tel
+telle
+tellement
+telles
+tels
+tenant
+tend
+tenir
+tente
+tes
+tic
+tien
+tienne
+tiennes
+tiens
+toc
+toi
+toi-mÃªme
+ton
+touchant
+toujours
+tous
+tout
+toute
+toutefois
+toutes
+treize
+trente
+tres
+trois
+troisiÃ¨me
+troisiÃ¨mement
+trop
+trÃ¨s
+tsoin
+tsouin
+tu
+tÃ©
+u
+un
+une
+unes
+uniformement
+unique
+uniques
+uns
+v
+va
+vais
+valeur
+vas
+vers
+via
+vif
+vifs
+vingt
+vivat
+vive
+vives
+vlan
+voici
+voie
+voient
+voilÃ 
+voire
+vont
+vos
+votre
+vous
+vous-mÃªmes
+vu
+vÃ©
+vÃ´tre
+vÃ´tres
+w
+x
+y
+z
+zut
+Ã 
+Ã¢
+Ã§a
+Ã¨s
+Ã©taient
+Ã©tais
+Ã©tait
+Ã©tant
+Ã©tat
+Ã©tiez
+Ã©tions
+Ã©tÃ©
+Ã©tÃ©e
+Ã©tÃ©es
+Ã©tÃ©s
+Ãªtes
+Ãªtre
+Ã´
\ No newline at end of file
diff --git a/static/stopwords/ga b/static/stopwords/ga
new file mode 100644
index 0000000..5ad466d
--- /dev/null
+++ b/static/stopwords/ga
@@ -0,0 +1,109 @@
+a
+ach
+ag
+agus
+an
+aon
+ar
+arna
+as
+b'
+ba
+beirt
+bhÃºr
+caoga
+ceathair
+ceathrar
+chomh
+chtÃ³
+chuig
+chun
+cois
+cÃ©ad
+cÃºig
+cÃºigear
+d'
+daichead
+dar
+de
+deich
+deichniÃºr
+den
+dhÃ¡
+do
+don
+dtÃ­
+dÃ¡
+dÃ¡r
+dÃ³
+faoi
+faoin
+faoina
+faoinÃ¡r
+fara
+fiche
+gach
+gan
+go
+gur
+haon
+hocht
+i
+iad
+idir
+in
+ina
+ins
+inÃ¡r
+is
+le
+leis
+lena
+lenÃ¡r
+m'
+mar
+mo
+mÃ©
+na
+nach
+naoi
+naonÃºr
+nÃ¡
+nÃ­
+nÃ­or
+nÃ³
+nÃ³cha
+ocht
+ochtar
+os
+roimh
+sa
+seacht
+seachtar
+seachtÃ³
+seasca
+seisear
+siad
+sibh
+sinn
+sna
+sÃ©
+sÃ­
+tar
+thar
+thÃº
+triÃºr
+trÃ­
+trÃ­na
+trÃ­nÃ¡r
+trÃ­ocha
+tÃº
+um
+Ã¡r
+Ã©
+Ã©is
+Ã­
+Ã³
+Ã³n
+Ã³na
+Ã³nÃ¡r
\ No newline at end of file
diff --git a/static/stopwords/gl b/static/stopwords/gl
new file mode 100644
index 0000000..c5baac0
--- /dev/null
+++ b/static/stopwords/gl
@@ -0,0 +1,160 @@
+a
+alÃ­
+ao
+aos
+aquel
+aquela
+aquelas
+aqueles
+aquilo
+aquÃ­
+as
+asÃ­
+aÃ­nda
+ben
+cando
+che
+co
+coa
+coas
+comigo
+con
+connosco
+contigo
+convosco
+cos
+cun
+cunha
+cunhas
+cuns
+da
+dalgunha
+dalgunhas
+dalgÃºn
+dalgÃºns
+das
+de
+del
+dela
+delas
+deles
+desde
+deste
+do
+dos
+dun
+dunha
+dunhas
+duns
+e
+el
+ela
+elas
+eles
+en
+era
+eran
+esa
+esas
+ese
+eses
+esta
+estaba
+estar
+este
+estes
+estiven
+estou
+estÃ¡
+estÃ¡n
+eu
+facer
+foi
+foron
+fun
+habÃ­a
+hai
+iso
+isto
+la
+las
+lle
+lles
+lo
+los
+mais
+me
+meu
+meus
+min
+miÃ±a
+miÃ±as
+moi
+na
+nas
+neste
+nin
+no
+non
+nos
+nosa
+nosas
+noso
+nosos
+nun
+nunha
+nunhas
+nuns
+nÃ³s
+o
+os
+ou
+para
+pero
+pode
+pois
+pola
+polas
+polo
+polos
+por
+que
+se
+senÃ³n
+ser
+seu
+seus
+sexa
+sido
+sobre
+sÃºa
+sÃºas
+tamÃ©n
+tan
+te
+ten
+ter
+teu
+teus
+teÃ±en
+teÃ±o
+ti
+tido
+tiven
+tiÃ±a
+tÃºa
+tÃºas
+un
+unha
+unhas
+uns
+vos
+vosa
+vosas
+voso
+vosos
+vÃ³s
+Ã¡
+Ã©
+Ã³
+Ã³s
\ No newline at end of file
diff --git a/static/stopwords/gu b/static/stopwords/gu
new file mode 100644
index 0000000..c68fd56
--- /dev/null
+++ b/static/stopwords/gu
@@ -0,0 +1,224 @@
+àªàªàªà«
+àªàªàª¦àª°
+àªàª¥àªµàª¾
+àªàª¨à«
+àªàª®àª¨à«
+àªàª®àª¾àª°à«àª
+àªàª®à«
+àªàª¹à«àª
+àª
+àªàªàª³
+àªàª¥à«
+àªàª¨à«àª
+àªàª¨à«
+àªàªªàª£àª¨à«
+àªàªªàª£à«àª
+àªàªªàª£à«
+àªàªªà«
+àªàª°
+àªàªµà«
+àªàªµà«
+àªàªªàª°
+àªàª­àª¾
+àªàªàªà«
+àªàª­à«àª
+àª
+àªàª
+àªàª¨
+àªàª¨àª¾
+àªàª¨àª¾àª
+àªàª¨à«
+àªàª¨à«àª
+àªàª¨à«
+àªàª¨à«
+àªàª®
+àªàªµàª¾
+àªàªµàª¾àª
+àªàªµà«
+àªàªµà«àª
+àªàªµà«
+àªàªà«àª
+àªàªàªàª
+àªàª
+àªàª¯à«àª
+àªàª¯à«
+àªàª°àª¤àª¾àª
+àªàª°àªµà«àª
+àªàª°à«
+àªàª°à«àª
+àªàª°à«àª
+àªàª°à«
+àªàª°à«àª²à«àª
+àªàª°à«àª¯àª¾
+àªàª°à«àª¯àª¾àª
+àªàª°à«àª¯à«àª
+àªàª°à«àª¯à«
+àªàª¾àªàª
+àªà«
+àªà«àªàª²à«àª
+àªà«àª®
+àªà«àªµà«
+àªà«àªµà«àª
+àªà«àª
+àªà«àªàª
+àªà«àª£
+àªà«àª£à«
+àªà«àª¨à«
+àªà«àª¯àª¾àª
+àªà«àª¯àª¾àª°à«
+àªà«àª¬
+àªàª
+àªàª¯àª¾
+àªàª¯àª¾àª
+àªàª¯à«àª
+àªàª¯à«
+àªàª£à«àª
+àª
+àªàª¤àª¾àª
+àªà«àª
+àªà«àª
+àªà«
+àªà«àª
+àªà«
+àª
+àªàª¾àª¯
+àªà«
+àªà«
+àªà«àªàª²à«àª
+àªà«àª¨à«
+àªà«àª®
+àªà«àªµà«
+àªà«àªµà«àª
+àªà«àªµà«
+àªà«
+àªà«àªàª
+àªà«àª¯àª¾àª
+àªà«àª¯àª¾àª°à«
+àªàª¾àªà«àª
+àª¤àª¨à«
+àª¤àª®àª¨à«
+àª¤àª®àª¾àª°à«àª
+àª¤àª®à«
+àª¤àª¾
+àª¤àª¾àª°àª¾àª¥à«
+àª¤àª¾àª°àª¾àª®àª¾àª
+àª¤àª¾àª°à«àª
+àª¤à«àª
+àª¤à«
+àª¤à«àª
+àª¤à«àª
+àª¤à«àª£à«
+àª¤à«àª¥à«
+àª¤à«àª¨àª¾
+àª¤à«àª¨à«
+àª¤à«àª¨à«àª
+àª¤à«àª¨à«
+àª¤à«àª®
+àª¤à«àª®àª¨à«àª
+àª¤à«àª®àª¨à«
+àª¤à«àªµà«
+àª¤à«àªµà«àª
+àª¤à«
+àª¤à«àª¯àª¾àª
+àª¤à«àª¯àª¾àª°à«
+àª¥àª
+àª¥àª
+àª¥àªàª
+àª¥àª¤àª¾
+àª¥àª¤àª¾àª
+àª¥àª¤à«
+àª¥àª¤à«àª
+àª¥àª¤à«
+àª¥àª¯àª¾
+àª¥àª¯àª¾àª
+àª¥àª¯à«àª
+àª¥àª¯à«àª²à«àª
+àª¥àª¯à«
+àª¥àªµà«àª
+àª¥àª¾àªàª
+àª¥àª¾àª
+àª¥àª¾àª¯
+àª¥à«
+àª¥à«àª¡à«àª
+àª¦àª°à«àª
+àª¨
+àª¨àª
+àª¨àª.
+àª¨àª¥à«
+àª¨àª¹àª¿
+àª¨àª¹à«
+àª¨àª¹à«àª
+àª¨àª¾
+àª¨à«
+àª¨à«àªà«
+àª¨à«àª
+àª¨à«
+àª¨à«
+àªªàªà«
+àªªàª£
+àªªàª°
+àªªàª°àªàª¤à«
+àªªàª¹à«àª²àª¾àª
+àªªàª¾àªàª³
+àªªàª¾àª¸à«
+àªªà«àª¤àª¾àª¨à«àª
+àªªà«àª°àª¤à«àª¯à«àª
+àª«àªà«àª¤
+àª«àª°à«
+àª«àª°à«àª¥à«
+àª¬àªàª¨à«
+àª¬àª§àª¾
+àª¬àª§à«àª
+àª¬àª¨à«
+àª¬àª¹àª¾àª°
+àª¬àª¹à«
+àª¬àª¾àª¦
+àª¬à«
+àª®àª¨à«
+àª®àª¾
+àª®àª¾àª
+àª®àª¾àªà«
+àª®àª¾àª¤à«àª°
+àª®àª¾àª°à«àª
+àª®à«
+àª®à«àªàªµà«àª
+àª®à«àªà«
+àª®à«àªà«àª¯àª¾
+àª®à«àªà«àª¯àª¾àª
+àª®à«àªà«àª¯à«àª
+àª®à«àª
+àª°àª¹à«
+àª°àª¹à«
+àª°àª¹à«àªµà«àª
+àª°àª¹à«àª¯àª¾
+àª°àª¹à«àª¯àª¾àª
+àª°àª¹à«àª¯à«
+àª°à«àª¤à«
+àª°à«.
+àª°à«àª¾
+àª²à«àª¤àª¾
+àª²à«àª¤à«àª
+àª²à«àªµàª¾
+àªµàªà«àª°à«
+àªµàª§à«
+àª¶àªà«
+àª¶àª¾
+àª¶à«àª
+àª¸àª°àªà«àª
+àª¸àª¾àª®à«
+àª¸à«àª§à«
+àª¹àª¤àª¾
+àª¹àª¤àª¾àª
+àª¹àª¤à«
+àª¹àª¤à«àª
+àª¹àªµà«
+àª¹àª¶à«
+àª¹àª¶à«
+àª¹àª¾
+àª¹à«àª
+àª¹à«
+àª¹à«àª
+àª¹à«àªàª¶
+àª¹à«àªàª¶à«àª
+àª¹à«àª¯
+àª¹à«àªµàª¾
\ No newline at end of file
diff --git a/static/stopwords/ha b/static/stopwords/ha
new file mode 100644
index 0000000..dce823d
--- /dev/null
+++ b/static/stopwords/ha
@@ -0,0 +1,39 @@
+a
+amma
+ba
+ban
+ce
+cikin
+da
+don
+ga
+in
+ina
+ita
+ji
+ka
+ko
+kuma
+lokacin
+ma
+mai
+na
+ne
+ni
+sai
+shi
+su
+suka
+sun
+ta
+tafi
+take
+tana
+wani
+wannan
+wata
+ya
+yake
+yana
+yi
+za
\ No newline at end of file
diff --git a/static/stopwords/he b/static/stopwords/he
new file mode 100644
index 0000000..5f345f3
--- /dev/null
+++ b/static/stopwords/he
@@ -0,0 +1,194 @@
+×××
+××
+××××
+×××ª×
+×××ª×
+×××ª×
+×××ª×
+×××ª×
+×××ª×
+×××ª× ×
+××
+×××¨
+×××¨××ª
+×××¨×
+×××¨×××
+×××¨××
+×××¨×ª
+××
+××××
+×××
+×××
+×××¤×
+×××ª×
+×××ª×
+×××ª×
+×××ª×
+×××ª××
+×××ª××
+×××ª×
+×××ª×
+×××ª× ×
+××
+××
+×××
+×××
+××
+×× ×× ×
+×× ×
+××¡
+××£
+××¦×
+××©×¨
+××ª
+××ª×
+××ª××
+××ª××
+××ª×
+××ª×
+×××××××××
+××××¦×¢
+××××¦×¢××ª
+××××
+×××
+×××
+×××××
+×××§×××©××
+××¨×
+××©×××
+××©×¢××©
+××ª××
+××
+××¨×
+×××
+×××
+×××
+××××
+×××ª×
+×××ª×
+××
+××
+×× ×
+××¡××××©×××××
+××¨×
+×××××
+×××ª
+×××ª
+××
+×××ª
+××××
+××××
+×××××
+×××ª×¨×××
+××××
+×××××
+××××××ª
+××××××
+×××
+××××
+××××
+××©
+×××
+×××©×¨
+××××
+××××
+×××
+××
+×××¦×
+××
+×××
+××
+×××
+×××
+××
+××¤×
+××©
+××
+×××
+××××××ª××××ª
+×××
+××××
+××
+×××××ª
+×××
+×××
+××
+××
+×××
+×××
+×××
+××××
+×××¢××
+×××§×××©××
+×××¨××ª
+×× ×
+××¢××¨
+××¢×××
+××¤×××
+××¤× ×
+×××
+×××××¨×
+××××××¡×××
+××××
+××××¤×
+××××
+×××¢×
+××××¢
+××
+×××××
+×××
+××××¥
+××
+××××
+××××××
+××××
+××
+×× ××
+××¡×××
+××¢×
+××¢×××
+××¢×
+××¦×
+××§××××
+××ª××ª
+××ª×
+× ××
+× ××¨
+× ×
+×¢×
+×¢×
+×¢×
+×¢××
+×¢×××
+×¢××××
+×¢××××
+×¢×××
+×¢×××
+×¢××××
+×¢××× ×
+×¢×
+×¢×¦××
+×¢×¦×××
+×¢×¦×××
+×¢×¦××
+×¢×¦××
+×¢×¦××
+×¢×¦××
+×¢×¦×× ×
+×¤×
+×¨×§
+×©××
+×©×
+×©××
+×©×××
+×©×××
+×©××
+×©××
+×©××
+×©×××
+×©×××
+×©×××
+×©×× ×
+×©×
+×ª×××
+×ª××ª
\ No newline at end of file
diff --git a/static/stopwords/hi b/static/stopwords/hi
new file mode 100644
index 0000000..b4b2078
--- /dev/null
+++ b/static/stopwords/hi
@@ -0,0 +1,225 @@
+à¤à¤à¤¦à¤°
+à¤à¤¤
+à¤à¤¦à¤¿
+à¤à¤ª
+à¤à¤ªà¤¨à¤¾
+à¤à¤ªà¤¨à¤¿
+à¤à¤ªà¤¨à¥
+à¤à¤ªà¤¨à¥
+à¤à¤­à¤¿
+à¤à¤­à¥
+à¤à¤¦à¤¿
+à¤à¤ª
+à¤à¤à¤¹à¤¿à¤
+à¤à¤à¤¹à¥à¤
+à¤à¤à¤¹à¥à¤
+à¤à¤¤à¤¯à¤¾à¤¦à¤¿
+à¤à¤¤à¥à¤¯à¤¾à¤¦à¤¿
+à¤à¤¨
+à¤à¤¨à¤à¤¾
+à¤à¤¨à¥à¤¹à¥à¤
+à¤à¤¨à¥à¤¹à¥à¤
+à¤à¤¨à¥à¤¹à¥à¤
+à¤à¤¸
+à¤à¤¸à¤à¤¾
+à¤à¤¸à¤à¤¿
+à¤à¤¸à¤à¥
+à¤à¤¸à¤à¥
+à¤à¤¸à¤®à¥à¤
+à¤à¤¸à¤¿
+à¤à¤¸à¥
+à¤à¤¸à¥
+à¤à¤à¤¹à¤¿à¤
+à¤à¤à¤¹à¥à¤
+à¤à¤à¤¹à¥à¤
+à¤à¤¨
+à¤à¤¨à¤à¤¾
+à¤à¤¨à¤à¤¿
+à¤à¤¨à¤à¥
+à¤à¤¨à¤à¥
+à¤à¤¨à¤à¥
+à¤à¤¨à¥à¤¹à¥à¤
+à¤à¤¨à¥à¤¹à¥à¤
+à¤à¤¨à¥à¤¹à¥à¤
+à¤à¤¸
+à¤à¤¸à¤à¥
+à¤à¤¸à¤¿
+à¤à¤¸à¥
+à¤à¤¸à¥
+à¤à¤
+à¤à¤µà¤
+à¤à¤¸
+à¤à¤¸à¥
+à¤à¤¸à¥
+à¤à¤°
+à¤à¤°
+à¤à¤
+à¤à¤
+à¤à¤°
+à¤à¤°à¤¤à¤¾
+à¤à¤°à¤¤à¥
+à¤à¤°à¤¨à¤¾
+à¤à¤°à¤¨à¥
+à¤à¤°à¥à¤
+à¤à¤¹à¤¤à¥
+à¤à¤¹à¤¾
+à¤à¤¾
+à¤à¤¾à¤«à¤¿
+à¤à¤¾à¥à¥
+à¤à¤¿
+à¤à¤¿à¤à¤¹à¥à¤
+à¤à¤¿à¤à¤¹à¥à¤
+à¤à¤¿à¤¤à¤¨à¤¾
+à¤à¤¿à¤¨à¥à¤¹à¥à¤
+à¤à¤¿à¤¨à¥à¤¹à¥à¤
+à¤à¤¿à¤¯à¤¾
+à¤à¤¿à¤°
+à¤à¤¿à¤¸
+à¤à¤¿à¤¸à¤¿
+à¤à¤¿à¤¸à¥
+à¤à¤¿à¤¸à¥
+à¤à¥
+à¤à¥à¤
+à¤à¥à¤²
+à¤à¥
+à¤à¥
+à¤à¥à¤
+à¤à¥à¤
+à¤à¥à¤¨
+à¤à¥à¤¨à¤¸à¤¾
+à¤à¥à¤¨
+à¤à¥à¤¨à¤¸à¤¾
+à¤à¤¯à¤¾
+à¤à¤°
+à¤à¤¬
+à¤à¤¹à¤¾à¤
+à¤à¤¹à¤¾à¤
+à¤à¤¾
+à¤à¤¿à¤à¤¹à¥à¤
+à¤à¤¿à¤à¤¹à¥à¤
+à¤à¤¿à¤¤à¤¨à¤¾
+à¤à¤¿à¤§à¤°
+à¤à¤¿à¤¨
+à¤à¤¿à¤¨à¥à¤¹à¥à¤
+à¤à¤¿à¤¨à¥à¤¹à¥à¤
+à¤à¤¿à¤¸
+à¤à¤¿à¤¸à¥
+à¤à¥à¤§à¤°
+à¤à¥à¤¸à¤¾
+à¤à¥à¤¸à¥
+à¤à¥à¤¸à¤¾
+à¤à¥à¤¸à¥
+à¤à¥
+à¤¤à¤
+à¤¤à¤¬
+à¤¤à¤°à¤¹
+à¤¤à¤¿à¤à¤¹à¥à¤
+à¤¤à¤¿à¤à¤¹à¥à¤
+à¤¤à¤¿à¤¨
+à¤¤à¤¿à¤¨à¥à¤¹à¥à¤
+à¤¤à¤¿à¤¨à¥à¤¹à¥à¤
+à¤¤à¤¿à¤¸
+à¤¤à¤¿à¤¸à¥
+à¤¤à¥
+à¤¥à¤¾
+à¤¥à¤¿
+à¤¥à¥
+à¤¥à¥
+à¤¦à¤¬à¤¾à¤°à¤¾
+à¤¦à¤µà¤¾à¤°à¤¾
+à¤¦à¤¿à¤¯à¤¾
+à¤¦à¥à¤¸à¤°à¤¾
+à¤¦à¥à¤¸à¤°à¥
+à¤¦à¥à¤¸à¤°à¥
+à¤¦à¥
+à¤¦à¥à¤µà¤¾à¤°à¤¾
+à¤¨
+à¤¨à¤¹à¤¿à¤
+à¤¨à¤¹à¥à¤
+à¤¨à¤¾
+à¤¨à¤¿à¤à¥
+à¤¨à¤¿à¤¹à¤¾à¤¯à¤¤
+à¤¨à¥à¤à¥
+à¤¨à¥
+à¤ªà¤°
+à¤ªà¤¹à¤²à¥
+à¤ªà¥à¤°à¤¾
+à¤ªà¥à¤°à¤¾
+à¤ªà¥
+à¤«à¤¿à¤°
+à¤¬à¤¨à¤¿
+à¤¬à¤¨à¥
+à¤¬à¤¹à¤¿
+à¤¬à¤¹à¥
+à¤¬à¤¹à¥à¤¤
+à¤¬à¤¾à¤¦
+à¤¬à¤¾à¤²à¤¾
+à¤¬à¤¿à¤²à¤à¥à¤²
+à¤­à¤¿
+à¤­à¤¿à¤¤à¤°
+à¤­à¥
+à¤­à¥à¤¤à¤°
+à¤®à¤à¤°
+à¤®à¤¾à¤¨à¥
+à¤®à¥
+à¤®à¥à¤
+à¤¯à¤¦à¤¿
+à¤¯à¤¹
+à¤¯à¤¹à¤¾à¤
+à¤¯à¤¹à¤¾à¤
+à¤¯à¤¹à¤¿
+à¤¯à¤¹à¥
+à¤¯à¤¾
+à¤¯à¤¿à¤¹
+à¤¯à¥
+à¤°à¤à¥à¤
+à¤°à¤µà¤¾à¤¸à¤¾
+à¤°à¤¹à¤¾
+à¤°à¤¹à¥
+à¤±à¥à¤µà¤¾à¤¸à¤¾
+à¤²à¤¿à¤
+à¤²à¤¿à¤¯à¥
+à¤²à¥à¤à¤¿à¤¨
+à¤µ
+à¤µà¤à¥à¤°à¤¹
+à¤µà¤°à¤
+à¤µà¤°à¥à¤
+à¤µà¤¹
+à¤µà¤¹à¤¾à¤
+à¤µà¤¹à¤¾à¤
+à¤µà¤¹à¤¿à¤
+à¤µà¤¹à¥à¤
+à¤µà¤¾à¤²à¥
+à¤µà¥à¤¹
+à¤µà¥
+à¤µà¥à¥à¤°à¤¹
+à¤¸à¤à¤
+à¤¸à¤à¤¤à¤¾
+à¤¸à¤à¤¤à¥
+à¤¸à¤¬à¤¸à¥
+à¤¸à¤­à¤¿
+à¤¸à¤­à¥
+à¤¸à¤¾à¤¥
+à¤¸à¤¾à¤¬à¥à¤¤
+à¤¸à¤¾à¤­
+à¤¸à¤¾à¤°à¤¾
+à¤¸à¥
+à¤¸à¥
+à¤¹à¤¿
+à¤¹à¥
+à¤¹à¥à¤
+à¤¹à¥à¤
+à¤¹à¥à¤
+à¤¹à¥à¤
+à¤¹à¥à¤
+à¤¹à¥
+à¤¹à¥à¤
+à¤¹à¥
+à¤¹à¥à¤
+à¤¹à¥
+à¤¹à¥à¤¤à¤¾
+à¤¹à¥à¤¤à¤¿
+à¤¹à¥à¤¤à¥
+à¤¹à¥à¤¤à¥
+à¤¹à¥à¤¨à¤¾
+à¤¹à¥à¤¨à¥
\ No newline at end of file
diff --git a/static/stopwords/hr b/static/stopwords/hr
new file mode 100644
index 0000000..64388b0
--- /dev/null
+++ b/static/stopwords/hr
@@ -0,0 +1,179 @@
+a
+ako
+ali
+bi
+bih
+bila
+bili
+bilo
+bio
+bismo
+biste
+biti
+bumo
+da
+do
+duÅ¾
+ga
+hoÄe
+hoÄemo
+hoÄete
+hoÄeÅ¡
+hoÄu
+i
+iako
+ih
+ili
+iz
+ja
+je
+jedna
+jedne
+jedno
+jer
+jesam
+jesi
+jesmo
+jest
+jeste
+jesu
+jim
+joj
+joÅ¡
+ju
+kada
+kako
+kao
+koja
+koje
+koji
+kojima
+koju
+kroz
+li
+me
+mene
+meni
+mi
+mimo
+moj
+moja
+moje
+mu
+na
+nad
+nakon
+nam
+nama
+nas
+naÅ¡
+naÅ¡a
+naÅ¡e
+naÅ¡eg
+ne
+nego
+neka
+neki
+nekog
+neku
+nema
+netko
+neÄe
+neÄemo
+neÄete
+neÄeÅ¡
+neÄu
+neÅ¡to
+ni
+nije
+nikoga
+nikoje
+nikoju
+nisam
+nisi
+nismo
+niste
+nisu
+njega
+njegov
+njegova
+njegovo
+njemu
+njezin
+njezina
+njezino
+njih
+njihov
+njihova
+njihovo
+njim
+njima
+njoj
+nju
+no
+o
+od
+odmah
+on
+ona
+oni
+ono
+ova
+pa
+pak
+po
+pod
+pored
+prije
+s
+sa
+sam
+samo
+se
+sebe
+sebi
+si
+smo
+ste
+su
+sve
+svi
+svog
+svoj
+svoja
+svoje
+svom
+ta
+tada
+taj
+tako
+te
+tebe
+tebi
+ti
+to
+toj
+tome
+tu
+tvoj
+tvoja
+tvoje
+u
+uz
+vam
+vama
+vas
+vaÅ¡
+vaÅ¡a
+vaÅ¡e
+veÄ
+vi
+vrlo
+za
+zar
+Äe
+Äemo
+Äete
+ÄeÅ¡
+Äu
+Å¡to
\ No newline at end of file
diff --git a/static/stopwords/hu b/static/stopwords/hu
new file mode 100644
index 0000000..9651ead
--- /dev/null
+++ b/static/stopwords/hu
@@ -0,0 +1,789 @@
+a
+abba
+abban
+abbÃ³l
+addig
+ahhoz
+ahogy
+ahol
+aki
+akik
+akkor
+akÃ¡r
+alapjÃ¡n
+alatt
+alatta
+alattad
+alattam
+alattatok
+alattuk
+alattunk
+alÃ¡
+alÃ¡d
+alÃ¡juk
+alÃ¡m
+alÃ¡nk
+alÃ¡tok
+alÃ³l
+alÃ³la
+alÃ³lad
+alÃ³lam
+alÃ³latok
+alÃ³luk
+alÃ³lunk
+amely
+amelybol
+amelyek
+amelyekben
+amelyeket
+amelyet
+amelyik
+amelynek
+ami
+amikor
+amit
+amolyan
+amott
+amÃ­g
+annak
+annÃ¡l
+arra
+arrÃ³l
+attÃ³l
+az
+aznap
+azok
+azokat
+azokba
+azokban
+azokbÃ³l
+azokhoz
+azokig
+azokkal
+azokkÃ¡
+azoknak
+azoknÃ¡l
+azokon
+azokra
+azokrÃ³l
+azoktÃ³l
+azokÃ©rt
+azon
+azonban
+azonnal
+azt
+aztÃ¡n
+azutÃ¡n
+azzal
+azzÃ¡
+azÃ©rt
+bal
+balra
+ban
+be
+belÃ©
+belÃ©d
+belÃ©jÃ¼k
+belÃ©m
+belÃ©nk
+belÃ©tek
+belÃ¼l
+belÅle
+belÅled
+belÅlem
+belÅletek
+belÅlÃ¼k
+belÅlÃ¼nk
+ben
+benne
+benned
+bennem
+bennetek
+bennÃ¼k
+bennÃ¼nk
+bÃ¡r
+bÃ¡rcsak
+bÃ¡rmilyen
+bÃºcsÃº
+cikk
+cikkek
+cikkeket
+csak
+csakhogy
+csupÃ¡n
+de
+dehogy
+e
+ebbe
+ebben
+ebbÅl
+eddig
+egy
+egyebek
+egyebet
+egyedÃ¼l
+egyelÅre
+egyes
+egyet
+egyetlen
+egyik
+egymÃ¡s
+egyre
+egyszerre
+egyÃ©b
+egyÃ¼tt
+egÃ©sz
+egÃ©szen
+ehhez
+ekkor
+el
+eleinte
+ellen
+ellenes
+elleni
+ellenÃ©re
+elmondta
+elsÃµ
+elsÅ
+elsÅk
+elsÅsorban
+elsÅt
+elÃ©
+elÃ©d
+elÃ©g
+elÃ©jÃ¼k
+elÃ©m
+elÃ©nk
+elÃ©tek
+elÃµ
+elÃµszÃ¶r
+elÃµtt
+elÅ
+elÅbb
+elÅl
+elÅle
+elÅled
+elÅlem
+elÅletek
+elÅlÃ¼k
+elÅlÃ¼nk
+elÅszÃ¶r
+elÅtt
+elÅtte
+elÅtted
+elÅttem
+elÅttetek
+elÅttÃ¼k
+elÅttÃ¼nk
+elÅzÅ
+emilyen
+engem
+ennek
+ennyi
+ennÃ©l
+enyÃ©m
+erre
+errÅl
+esetben
+ettÅl
+ez
+ezek
+ezekbe
+ezekben
+ezekbÅl
+ezeken
+ezeket
+ezekhez
+ezekig
+ezekkel
+ezekkÃ©
+ezeknek
+ezeknÃ©l
+ezekre
+ezekrÅl
+ezektÅl
+ezekÃ©rt
+ezen
+ezentÃºl
+ezer
+ezret
+ezt
+ezutÃ¡n
+ezzel
+ezzÃ©
+ezÃ©rt
+fel
+fele
+felek
+felet
+felett
+felÃ©
+fent
+fenti
+fÃ©l
+fÃ¶lÃ©
+gyakran
+ha
+hallÃ³
+hamar
+hanem
+harmadik
+harmadikat
+harminc
+hat
+hatodik
+hatodikat
+hatot
+hatvan
+helyett
+hetedik
+hetediket
+hetet
+hetven
+hirtelen
+hiszen
+hiÃ¡ba
+hogy
+hogyan
+hol
+holnap
+holnapot
+honnan
+hova
+hozzÃ¡
+hozzÃ¡d
+hozzÃ¡juk
+hozzÃ¡m
+hozzÃ¡nk
+hozzÃ¡tok
+hurrÃ¡
+huszadik
+hÃ¡ny
+hÃ¡nyszor
+hÃ¡rmat
+hÃ¡rom
+hÃ¡t
+hÃ¡tha
+hÃ¡tulsÃ³
+hÃ©t
+hÃºsz
+ide
+ide-Ð¾da
+idÃ©n
+igazÃ¡n
+igen
+ill
+ill.
+illetve
+ilyen
+ilyenkor
+immÃ¡r
+inkÃ¡bb
+is
+ismÃ©t
+ison
+itt
+jelenleg
+jobban
+jobbra
+jÃ³
+jÃ³l
+jÃ³lesik
+jÃ³val
+jÃ¶vÅre
+kell
+kellene
+kellett
+kelljen
+keressÃ¼nk
+keresztÃ¼l
+ketten
+kettÅ
+kettÅt
+kevÃ©s
+ki
+kiben
+kibÅl
+kicsit
+kicsoda
+kihez
+kik
+kikbe
+kikben
+kikbÅl
+kiken
+kiket
+kikhez
+kikkel
+kikkÃ©
+kiknek
+kiknÃ©l
+kikre
+kikrÅl
+kiktÅl
+kikÃ©rt
+kilenc
+kilencedik
+kilencediket
+kilencet
+kilencven
+kin
+kinek
+kinÃ©l
+kire
+kirÅl
+kit
+kitÅl
+kivel
+kivÃ©
+kiÃ©
+kiÃ©rt
+korÃ¡bban
+kÃ©pest
+kÃ©rem
+kÃ©rlek
+kÃ©sz
+kÃ©sÅ
+kÃ©sÅbb
+kÃ©sÅn
+kÃ©t
+kÃ©tszer
+kÃ­vÃ¼l
+kÃ¶rÃ¼l
+kÃ¶szÃ¶nhetÅen
+kÃ¶szÃ¶nÃ¶m
+kÃ¶zben
+kÃ¶zel
+kÃ¶zepesen
+kÃ¶zepÃ©n
+kÃ¶zÃ©
+kÃ¶zÃ¶tt
+kÃ¶zÃ¼l
+kÃ¼lÃ¶n
+kÃ¼lÃ¶nben
+kÃ¼lÃ¶nbÃ¶zÅ
+kÃ¼lÃ¶nbÃ¶zÅbb
+kÃ¼lÃ¶nbÃ¶zÅek
+lassan
+le
+legalÃ¡bb
+legyen
+lehet
+lehetetlen
+lehetett
+lehetÅleg
+lehetÅsÃ©g
+lenne
+lenni
+lennÃ©k
+lennÃ©nek
+lesz
+leszek
+lesznek
+leszÃ¼nk
+lett
+lettek
+lettem
+lettÃ¼nk
+lÃ©vÅ
+ma
+maga
+magad
+magam
+magatokat
+magukat
+magunkat
+magÃ¡t
+mai
+majd
+majdnem
+manapsÃ¡g
+meg
+megcsinÃ¡l
+megcsinÃ¡lnak
+megint
+megvan
+mellett
+mellette
+melletted
+mellettem
+mellettetek
+mellettÃ¼k
+mellettÃ¼nk
+mellÃ©
+mellÃ©d
+mellÃ©jÃ¼k
+mellÃ©m
+mellÃ©nk
+mellÃ©tek
+mellÅl
+mellÅle
+mellÅled
+mellÅlem
+mellÅletek
+mellÅlÃ¼k
+mellÅlÃ¼nk
+mely
+melyek
+melyik
+mennyi
+mert
+mi
+miatt
+miatta
+miattad
+miattam
+miattatok
+miattuk
+miattunk
+mibe
+miben
+mibÅl
+mihez
+mik
+mikbe
+mikben
+mikbÅl
+miken
+miket
+mikhez
+mikkel
+mikkÃ©
+miknek
+miknÃ©l
+mikor
+mikre
+mikrÅl
+miktÅl
+mikÃ©rt
+milyen
+min
+mind
+mindegyik
+mindegyiket
+minden
+mindenesetre
+mindenki
+mindent
+mindenÃ¼tt
+mindig
+mindketten
+minek
+minket
+mint
+mintha
+minÃ©l
+mire
+mirÅl
+mit
+mitÅl
+mivel
+mivÃ©
+miÃ©rt
+mondta
+most
+mostanÃ¡ig
+mÃ¡r
+mÃ¡s
+mÃ¡sik
+mÃ¡sikat
+mÃ¡snap
+mÃ¡sodik
+mÃ¡sodszor
+mÃ¡sok
+mÃ¡sokat
+mÃ¡st
+mÃ©g
+mÃ©gis
+mÃ­g
+mÃ¶gÃ©
+mÃ¶gÃ©d
+mÃ¶gÃ©jÃ¼k
+mÃ¶gÃ©m
+mÃ¶gÃ©nk
+mÃ¶gÃ©tek
+mÃ¶gÃ¶tt
+mÃ¶gÃ¶tte
+mÃ¶gÃ¶tted
+mÃ¶gÃ¶ttem
+mÃ¶gÃ¶ttetek
+mÃ¶gÃ¶ttÃ¼k
+mÃ¶gÃ¶ttÃ¼nk
+mÃ¶gÃ¼l
+mÃ¶gÃ¼le
+mÃ¶gÃ¼led
+mÃ¶gÃ¼lem
+mÃ¶gÃ¼letek
+mÃ¶gÃ¼lÃ¼k
+mÃ¶gÃ¼lÃ¼nk
+mÃºltkor
+mÃºlva
+na
+nagy
+nagyobb
+nagyon
+naponta
+napot
+ne
+negyedik
+negyediket
+negyven
+neked
+nekem
+neki
+nekik
+nektek
+nekÃ¼nk
+nem
+nemcsak
+nemrÃ©g
+nincs
+nyolc
+nyolcadik
+nyolcadikat
+nyolcat
+nyolcvan
+nÃ¡la
+nÃ¡lad
+nÃ¡lam
+nÃ¡latok
+nÃ¡luk
+nÃ¡lunk
+nÃ©gy
+nÃ©gyet
+nÃ©ha
+nÃ©hÃ¡ny
+nÃ©lkÃ¼l
+o
+oda
+ok
+olyan
+onnan
+ott
+pedig
+persze
+pÃ¡r
+pÃ©ldÃ¡ul
+rajta
+rajtad
+rajtam
+rajtatok
+rajtuk
+rajtunk
+rendben
+rosszul
+rÃ¡
+rÃ¡d
+rÃ¡juk
+rÃ¡m
+rÃ¡nk
+rÃ¡tok
+rÃ©gen
+rÃ©gÃ³ta
+rÃ©szÃ©re
+rÃ³la
+rÃ³lad
+rÃ³lam
+rÃ³latok
+rÃ³luk
+rÃ³lunk
+rÃ¶gtÃ¶n
+s
+sajÃ¡t
+se
+sem
+semmi
+semmilyen
+semmisÃ©g
+senki
+soha
+sok
+sokan
+sokat
+sokkal
+sokszor
+sokÃ¡ig
+sorÃ¡n
+stb.
+szemben
+szerbusz
+szerint
+szerinte
+szerinted
+szerintem
+szerintetek
+szerintÃ¼k
+szerintÃ¼nk
+szervusz
+szinte
+szÃ¡mÃ¡ra
+szÃ¡z
+szÃ¡zadik
+szÃ¡zat
+szÃ©pen
+szÃ©t
+szÃ­ves
+szÃ­vesen
+szÃ­veskedjÃ©k
+sÅt
+talÃ¡n
+tavaly
+te
+tegnap
+tegnapelÅtt
+tehÃ¡t
+tele
+teljes
+tessÃ©k
+ti
+tied
+titeket
+tizedik
+tizediket
+tizenegy
+tizenegyedik
+tizenhat
+tizenhÃ¡rom
+tizenhÃ©t
+tizenkettedik
+tizenkettÅ
+tizenkilenc
+tizenkÃ©t
+tizennyolc
+tizennÃ©gy
+tizenÃ¶t
+tizet
+tovÃ¡bb
+tovÃ¡bbi
+tovÃ¡bbÃ¡
+tÃ¡vol
+tÃ©ged
+tÃ©nyleg
+tÃ­z
+tÃ¶bb
+tÃ¶bbi
+tÃ¶bbszÃ¶r
+tÃºl
+tÅle
+tÅled
+tÅlem
+tÅletek
+tÅlÃ¼k
+tÅlÃ¼nk
+ugyanakkor
+ugyanez
+ugyanis
+ugye
+urak
+uram
+urat
+utoljÃ¡ra
+utolsÃ³
+utÃ¡n
+utÃ¡na
+vagy
+vagyis
+vagyok
+vagytok
+vagyunk
+vajon
+valahol
+valaki
+valakit
+valamelyik
+valami
+valamint
+valÃ³
+van
+vannak
+vele
+veled
+velem
+veletek
+velÃ¼k
+velÃ¼nk
+vissza
+viszlÃ¡t
+viszont
+viszontlÃ¡tÃ¡sra
+volna
+volnÃ¡nak
+volnÃ©k
+volt
+voltak
+voltam
+voltunk
+vÃ©gre
+vÃ©gÃ©n
+vÃ©gÃ¼l
+Ã¡ltal
+Ã¡ltalÃ¡ban
+Ã¡m
+Ã¡t
+Ã©ljen
+Ã©n
+Ã©ppen
+Ã©rte
+Ã©rted
+Ã©rtem
+Ã©rtetek
+Ã©rtÃ¼k
+Ã©rtÃ¼nk
+Ã©s
+Ã©v
+Ã©vben
+Ã©ve
+Ã©vek
+Ã©ves
+Ã©vi
+Ã©vvel
+Ã­gy
+Ã³ta
+Ãµ
+Ãµk
+Ãµket
+Ã¶n
+Ã¶nbe
+Ã¶nben
+Ã¶nbÅl
+Ã¶nhÃ¶z
+Ã¶nnek
+Ã¶nnel
+Ã¶nnÃ©l
+Ã¶nre
+Ã¶nrÅl
+Ã¶nt
+Ã¶ntÅl
+Ã¶nÃ©rt
+Ã¶nÃ¶k
+Ã¶nÃ¶kbe
+Ã¶nÃ¶kben
+Ã¶nÃ¶kbÅl
+Ã¶nÃ¶ket
+Ã¶nÃ¶khÃ¶z
+Ã¶nÃ¶kkel
+Ã¶nÃ¶knek
+Ã¶nÃ¶knÃ©l
+Ã¶nÃ¶kre
+Ã¶nÃ¶krÅl
+Ã¶nÃ¶ktÅl
+Ã¶nÃ¶kÃ©rt
+Ã¶nÃ¶kÃ¶n
+Ã¶nÃ¶n
+Ã¶ssze
+Ã¶t
+Ã¶tven
+Ã¶tÃ¶dik
+Ã¶tÃ¶diket
+Ã¶tÃ¶t
+Ãºgy
+Ãºgyis
+Ãºgynevezett
+Ãºj
+Ãºjabb
+Ãºjra
+Ãºr
+Å
+Åk
+Åket
+Åt
\ No newline at end of file
diff --git a/static/stopwords/hy b/static/stopwords/hy
new file mode 100644
index 0000000..327af43
--- /dev/null
+++ b/static/stopwords/hy
@@ -0,0 +1,45 @@
+Õ¡ÕµÕ¤
+Õ¡ÕµÕ¬
+Õ¡ÕµÕ¶
+Õ¡ÕµÕ½
+Õ¤Õ¸Ö
+Õ¤Õ¸ÖÖ
+Õ¥Õ´
+Õ¥Õ¶
+Õ¥Õ¶Ö
+Õ¥Õ½
+Õ¥Ö
+Õ§
+Õ§Õ«
+Õ§Õ«Õ¶
+Õ§Õ«Õ¶Ö
+Õ§Õ«Ö
+Õ§Õ«Ö
+Õ§Ö
+Õ¨Õ½Õ¿
+Õ©
+Õ«
+Õ«Õ¶
+Õ«Õ½Õ¯
+Õ«Ö
+Õ¯Õ¡Õ´
+Õ°Õ¡Õ´Õ¡Ö
+Õ°Õ¥Õ¿
+Õ°Õ¥Õ¿Õ¸
+Õ´Õ¥Õ¶Ö
+Õ´Õ¥Õ»
+Õ´Õ«
+Õ¶
+Õ¶Õ¡
+Õ¶Õ¡Ö
+Õ¶ÖÕ¡
+Õ¶ÖÕ¡Õ¶Ö
+Õ¸Ö
+Õ¸ÖÕ¨
+Õ¸ÖÕ¸Õ¶Ö
+Õ¸ÖÕºÕ¥Õ½
+Õ¸Ö
+Õ¸ÖÕ´
+ÕºÕ«Õ¿Õ«
+Õ¾ÖÕ¡
+Ö
\ No newline at end of file
diff --git a/static/stopwords/id b/static/stopwords/id
new file mode 100644
index 0000000..28b6fe3
--- /dev/null
+++ b/static/stopwords/id
@@ -0,0 +1,758 @@
+ada
+adalah
+adanya
+adapun
+agak
+agaknya
+agar
+akan
+akankah
+akhir
+akhiri
+akhirnya
+aku
+akulah
+amat
+amatlah
+anda
+andalah
+antar
+antara
+antaranya
+apa
+apaan
+apabila
+apakah
+apalagi
+apatah
+artinya
+asal
+asalkan
+atas
+atau
+ataukah
+ataupun
+awal
+awalnya
+bagai
+bagaikan
+bagaimana
+bagaimanakah
+bagaimanapun
+bagi
+bagian
+bahkan
+bahwa
+bahwasanya
+baik
+bakal
+bakalan
+balik
+banyak
+bapak
+baru
+bawah
+beberapa
+begini
+beginian
+beginikah
+beginilah
+begitu
+begitukah
+begitulah
+begitupun
+bekerja
+belakang
+belakangan
+belum
+belumlah
+benar
+benarkah
+benarlah
+berada
+berakhir
+berakhirlah
+berakhirnya
+berapa
+berapakah
+berapalah
+berapapun
+berarti
+berawal
+berbagai
+berdatangan
+beri
+berikan
+berikut
+berikutnya
+berjumlah
+berkali-kali
+berkata
+berkehendak
+berkeinginan
+berkenaan
+berlainan
+berlalu
+berlangsung
+berlebihan
+bermacam
+bermacam-macam
+bermaksud
+bermula
+bersama
+bersama-sama
+bersiap
+bersiap-siap
+bertanya
+bertanya-tanya
+berturut
+berturut-turut
+bertutur
+berujar
+berupa
+besar
+betul
+betulkah
+biasa
+biasanya
+bila
+bilakah
+bisa
+bisakah
+boleh
+bolehkah
+bolehlah
+buat
+bukan
+bukankah
+bukanlah
+bukannya
+bulan
+bung
+cara
+caranya
+cukup
+cukupkah
+cukuplah
+cuma
+dahulu
+dalam
+dan
+dapat
+dari
+daripada
+datang
+dekat
+demi
+demikian
+demikianlah
+dengan
+depan
+di
+dia
+diakhiri
+diakhirinya
+dialah
+diantara
+diantaranya
+diberi
+diberikan
+diberikannya
+dibuat
+dibuatnya
+didapat
+didatangkan
+digunakan
+diibaratkan
+diibaratkannya
+diingat
+diingatkan
+diinginkan
+dijawab
+dijelaskan
+dijelaskannya
+dikarenakan
+dikatakan
+dikatakannya
+dikerjakan
+diketahui
+diketahuinya
+dikira
+dilakukan
+dilalui
+dilihat
+dimaksud
+dimaksudkan
+dimaksudkannya
+dimaksudnya
+diminta
+dimintai
+dimisalkan
+dimulai
+dimulailah
+dimulainya
+dimungkinkan
+dini
+dipastikan
+diperbuat
+diperbuatnya
+dipergunakan
+diperkirakan
+diperlihatkan
+diperlukan
+diperlukannya
+dipersoalkan
+dipertanyakan
+dipunyai
+diri
+dirinya
+disampaikan
+disebut
+disebutkan
+disebutkannya
+disini
+disinilah
+ditambahkan
+ditandaskan
+ditanya
+ditanyai
+ditanyakan
+ditegaskan
+ditujukan
+ditunjuk
+ditunjuki
+ditunjukkan
+ditunjukkannya
+ditunjuknya
+dituturkan
+dituturkannya
+diucapkan
+diucapkannya
+diungkapkan
+dong
+dua
+dulu
+empat
+enggak
+enggaknya
+entah
+entahlah
+guna
+gunakan
+hal
+hampir
+hanya
+hanyalah
+hari
+harus
+haruslah
+harusnya
+hendak
+hendaklah
+hendaknya
+hingga
+ia
+ialah
+ibarat
+ibaratkan
+ibaratnya
+ibu
+ikut
+ingat
+ingat-ingat
+ingin
+inginkah
+inginkan
+ini
+inikah
+inilah
+itu
+itukah
+itulah
+jadi
+jadilah
+jadinya
+jangan
+jangankan
+janganlah
+jauh
+jawab
+jawaban
+jawabnya
+jelas
+jelaskan
+jelaslah
+jelasnya
+jika
+jikalau
+juga
+jumlah
+jumlahnya
+justru
+kala
+kalau
+kalaulah
+kalaupun
+kalian
+kami
+kamilah
+kamu
+kamulah
+kan
+kapan
+kapankah
+kapanpun
+karena
+karenanya
+kasus
+kata
+katakan
+katakanlah
+katanya
+ke
+keadaan
+kebetulan
+kecil
+kedua
+keduanya
+keinginan
+kelamaan
+kelihatan
+kelihatannya
+kelima
+keluar
+kembali
+kemudian
+kemungkinan
+kemungkinannya
+kenapa
+kepada
+kepadanya
+kesampaian
+keseluruhan
+keseluruhannya
+keterlaluan
+ketika
+khususnya
+kini
+kinilah
+kira
+kira-kira
+kiranya
+kita
+kitalah
+kok
+kurang
+lagi
+lagian
+lah
+lain
+lainnya
+lalu
+lama
+lamanya
+lanjut
+lanjutnya
+lebih
+lewat
+lima
+luar
+macam
+maka
+makanya
+makin
+malah
+malahan
+mampu
+mampukah
+mana
+manakala
+manalagi
+masa
+masalah
+masalahnya
+masih
+masihkah
+masing
+masing-masing
+mau
+maupun
+melainkan
+melakukan
+melalui
+melihat
+melihatnya
+memang
+memastikan
+memberi
+memberikan
+membuat
+memerlukan
+memihak
+meminta
+memintakan
+memisalkan
+memperbuat
+mempergunakan
+memperkirakan
+memperlihatkan
+mempersiapkan
+mempersoalkan
+mempertanyakan
+mempunyai
+memulai
+memungkinkan
+menaiki
+menambahkan
+menandaskan
+menanti
+menanti-nanti
+menantikan
+menanya
+menanyai
+menanyakan
+mendapat
+mendapatkan
+mendatang
+mendatangi
+mendatangkan
+menegaskan
+mengakhiri
+mengapa
+mengatakan
+mengatakannya
+mengenai
+mengerjakan
+mengetahui
+menggunakan
+menghendaki
+mengibaratkan
+mengibaratkannya
+mengingat
+mengingatkan
+menginginkan
+mengira
+mengucapkan
+mengucapkannya
+mengungkapkan
+menjadi
+menjawab
+menjelaskan
+menuju
+menunjuk
+menunjuki
+menunjukkan
+menunjuknya
+menurut
+menuturkan
+menyampaikan
+menyangkut
+menyatakan
+menyebutkan
+menyeluruh
+menyiapkan
+merasa
+mereka
+merekalah
+merupakan
+meski
+meskipun
+meyakini
+meyakinkan
+minta
+mirip
+misal
+misalkan
+misalnya
+mula
+mulai
+mulailah
+mulanya
+mungkin
+mungkinkah
+nah
+naik
+namun
+nanti
+nantinya
+nyaris
+nyatanya
+oleh
+olehnya
+pada
+padahal
+padanya
+pak
+paling
+panjang
+pantas
+para
+pasti
+pastilah
+penting
+pentingnya
+per
+percuma
+perlu
+perlukah
+perlunya
+pernah
+persoalan
+pertama
+pertama-tama
+pertanyaan
+pertanyakan
+pihak
+pihaknya
+pukul
+pula
+pun
+punya
+rasa
+rasanya
+rata
+rupanya
+saat
+saatnya
+saja
+sajalah
+saling
+sama
+sama-sama
+sambil
+sampai
+sampai-sampai
+sampaikan
+sana
+sangat
+sangatlah
+satu
+saya
+sayalah
+se
+sebab
+sebabnya
+sebagai
+sebagaimana
+sebagainya
+sebagian
+sebaik
+sebaik-baiknya
+sebaiknya
+sebaliknya
+sebanyak
+sebegini
+sebegitu
+sebelum
+sebelumnya
+sebenarnya
+seberapa
+sebesar
+sebetulnya
+sebisanya
+sebuah
+sebut
+sebutlah
+sebutnya
+secara
+secukupnya
+sedang
+sedangkan
+sedemikian
+sedikit
+sedikitnya
+seenaknya
+segala
+segalanya
+segera
+seharusnya
+sehingga
+seingat
+sejak
+sejauh
+sejenak
+sejumlah
+sekadar
+sekadarnya
+sekali
+sekali-kali
+sekalian
+sekaligus
+sekalipun
+sekarang
+sekecil
+seketika
+sekiranya
+sekitar
+sekitarnya
+sekurang-kurangnya
+sekurangnya
+sela
+selagi
+selain
+selaku
+selalu
+selama
+selama-lamanya
+selamanya
+selanjutnya
+seluruh
+seluruhnya
+semacam
+semakin
+semampu
+semampunya
+semasa
+semasih
+semata
+semata-mata
+semaunya
+sementara
+semisal
+semisalnya
+sempat
+semua
+semuanya
+semula
+sendiri
+sendirian
+sendirinya
+seolah
+seolah-olah
+seorang
+sepanjang
+sepantasnya
+sepantasnyalah
+seperlunya
+seperti
+sepertinya
+sepihak
+sering
+seringnya
+serta
+serupa
+sesaat
+sesama
+sesampai
+sesegera
+sesekali
+seseorang
+sesuatu
+sesuatunya
+sesudah
+sesudahnya
+setelah
+setempat
+setengah
+seterusnya
+setiap
+setiba
+setibanya
+setidak-tidaknya
+setidaknya
+setinggi
+seusai
+sewaktu
+siap
+siapa
+siapakah
+siapapun
+sini
+sinilah
+soal
+soalnya
+suatu
+sudah
+sudahkah
+sudahlah
+supaya
+tadi
+tadinya
+tahu
+tahun
+tak
+tambah
+tambahnya
+tampak
+tampaknya
+tandas
+tandasnya
+tanpa
+tanya
+tanyakan
+tanyanya
+tapi
+tegas
+tegasnya
+telah
+tempat
+tengah
+tentang
+tentu
+tentulah
+tentunya
+tepat
+terakhir
+terasa
+terbanyak
+terdahulu
+terdapat
+terdiri
+terhadap
+terhadapnya
+teringat
+teringat-ingat
+terjadi
+terjadilah
+terjadinya
+terkira
+terlalu
+terlebih
+terlihat
+termasuk
+ternyata
+tersampaikan
+tersebut
+tersebutlah
+tertentu
+tertuju
+terus
+terutama
+tetap
+tetapi
+tiap
+tiba
+tiba-tiba
+tidak
+tidakkah
+tidaklah
+tiga
+tinggi
+toh
+tunjuk
+turut
+tutur
+tuturnya
+ucap
+ucapnya
+ujar
+ujarnya
+umum
+umumnya
+ungkap
+ungkapnya
+untuk
+usah
+usai
+waduh
+wah
+wahai
+waktu
+waktunya
+walau
+walaupun
+wong
+yaitu
+yakin
+yakni
+yang
\ No newline at end of file
diff --git a/static/stopwords/it b/static/stopwords/it
new file mode 100644
index 0000000..1e1f967
--- /dev/null
+++ b/static/stopwords/it
@@ -0,0 +1,632 @@
+a
+abbastanza
+abbia
+abbiamo
+abbiano
+abbiate
+accidenti
+ad
+adesso
+affinchÃ©
+agl
+agli
+ahime
+ahimÃ¨
+ai
+al
+alcuna
+alcuni
+alcuno
+all
+alla
+alle
+allo
+allora
+altre
+altri
+altrimenti
+altro
+altrove
+altrui
+anche
+ancora
+anni
+anno
+ansa
+anticipo
+assai
+attesa
+attraverso
+avanti
+avemmo
+avendo
+avente
+aver
+avere
+averlo
+avesse
+avessero
+avessi
+avessimo
+aveste
+avesti
+avete
+aveva
+avevamo
+avevano
+avevate
+avevi
+avevo
+avrai
+avranno
+avrebbe
+avrebbero
+avrei
+avremmo
+avremo
+avreste
+avresti
+avrete
+avrÃ 
+avrÃ²
+avuta
+avute
+avuti
+avuto
+basta
+ben
+bene
+benissimo
+brava
+bravo
+buono
+c
+caso
+cento
+certa
+certe
+certi
+certo
+che
+chi
+chicchessia
+chiunque
+ci
+ciascuna
+ciascuno
+cima
+cinque
+cio
+cioe
+cioÃ¨
+circa
+citta
+cittÃ 
+ciÃ²
+co
+codesta
+codesti
+codesto
+cogli
+coi
+col
+colei
+coll
+coloro
+colui
+come
+cominci
+comprare
+comunque
+con
+concernente
+conclusione
+consecutivi
+consecutivo
+consiglio
+contro
+cortesia
+cos
+cosa
+cosi
+cosÃ¬
+cui
+d
+da
+dagl
+dagli
+dai
+dal
+dall
+dalla
+dalle
+dallo
+dappertutto
+davanti
+degl
+degli
+dei
+del
+dell
+della
+delle
+dello
+dentro
+detto
+deve
+devo
+di
+dice
+dietro
+dire
+dirimpetto
+diventa
+diventare
+diventato
+dopo
+doppio
+dov
+dove
+dovra
+dovrÃ 
+dovunque
+due
+dunque
+durante
+e
+ebbe
+ebbero
+ebbi
+ecc
+ecco
+ed
+effettivamente
+egli
+ella
+entrambi
+eppure
+era
+erano
+eravamo
+eravate
+eri
+ero
+esempio
+esse
+essendo
+esser
+essere
+essi
+ex
+fa
+faccia
+facciamo
+facciano
+facciate
+faccio
+facemmo
+facendo
+facesse
+facessero
+facessi
+facessimo
+faceste
+facesti
+faceva
+facevamo
+facevano
+facevate
+facevi
+facevo
+fai
+fanno
+farai
+faranno
+fare
+farebbe
+farebbero
+farei
+faremmo
+faremo
+fareste
+faresti
+farete
+farÃ 
+farÃ²
+fatto
+favore
+fece
+fecero
+feci
+fin
+finalmente
+finche
+fine
+fino
+forse
+forza
+fosse
+fossero
+fossi
+fossimo
+foste
+fosti
+fra
+frattempo
+fu
+fui
+fummo
+fuori
+furono
+futuro
+generale
+gente
+gia
+giacche
+giorni
+giorno
+giu
+giÃ 
+gli
+gliela
+gliele
+glieli
+glielo
+gliene
+grande
+grazie
+gruppo
+ha
+haha
+hai
+hanno
+ho
+i
+ie
+ieri
+il
+improvviso
+in
+inc
+indietro
+infatti
+inoltre
+insieme
+intanto
+intorno
+invece
+io
+l
+la
+lasciato
+lato
+le
+lei
+li
+lo
+lontano
+loro
+lui
+lungo
+luogo
+lÃ 
+ma
+macche
+magari
+maggior
+mai
+male
+malgrado
+malissimo
+me
+medesimo
+mediante
+meglio
+meno
+mentre
+mesi
+mezzo
+mi
+mia
+mie
+miei
+mila
+miliardi
+milioni
+minimi
+mio
+modo
+molta
+molti
+moltissimo
+molto
+momento
+mondo
+ne
+negl
+negli
+nei
+nel
+nell
+nella
+nelle
+nello
+nemmeno
+neppure
+nessun
+nessuna
+nessuno
+niente
+no
+noi
+nome
+non
+nondimeno
+nonostante
+nonsia
+nostra
+nostre
+nostri
+nostro
+novanta
+nove
+nulla
+nuovi
+nuovo
+o
+od
+oggi
+ogni
+ognuna
+ognuno
+oltre
+oppure
+ora
+ore
+osi
+ossia
+ottanta
+otto
+paese
+parecchi
+parecchie
+parecchio
+parte
+partendo
+peccato
+peggio
+per
+perche
+perchÃ¨
+perchÃ©
+percio
+perciÃ²
+perfino
+pero
+persino
+persone
+perÃ²
+piedi
+pieno
+piglia
+piu
+piuttosto
+piÃ¹
+po
+pochissimo
+poco
+poi
+poiche
+possa
+possedere
+posteriore
+posto
+potrebbe
+preferibilmente
+presa
+press
+prima
+primo
+principalmente
+probabilmente
+promesso
+proprio
+puo
+pure
+purtroppo
+puÃ²
+qua
+qualche
+qualcosa
+qualcuna
+qualcuno
+quale
+quali
+qualunque
+quando
+quanta
+quante
+quanti
+quanto
+quantunque
+quarto
+quasi
+quattro
+quel
+quella
+quelle
+quelli
+quello
+quest
+questa
+queste
+questi
+questo
+qui
+quindi
+quinto
+realmente
+recente
+recentemente
+registrazione
+relativo
+riecco
+rispetto
+salvo
+sara
+sarai
+saranno
+sarebbe
+sarebbero
+sarei
+saremmo
+saremo
+sareste
+saresti
+sarete
+sarÃ 
+sarÃ²
+scola
+scopo
+scorso
+se
+secondo
+seguente
+seguito
+sei
+sembra
+sembrare
+sembrato
+sembrava
+sembri
+sempre
+senza
+sette
+si
+sia
+siamo
+siano
+siate
+siete
+sig
+solito
+solo
+soltanto
+sono
+sopra
+soprattutto
+sotto
+spesso
+sta
+stai
+stando
+stanno
+starai
+staranno
+starebbe
+starebbero
+starei
+staremmo
+staremo
+stareste
+staresti
+starete
+starÃ 
+starÃ²
+stata
+state
+stati
+stato
+stava
+stavamo
+stavano
+stavate
+stavi
+stavo
+stemmo
+stessa
+stesse
+stessero
+stessi
+stessimo
+stesso
+steste
+stesti
+stette
+stettero
+stetti
+stia
+stiamo
+stiano
+stiate
+sto
+su
+sua
+subito
+successivamente
+successivo
+sue
+sugl
+sugli
+sui
+sul
+sull
+sulla
+sulle
+sullo
+suo
+suoi
+tale
+tali
+talvolta
+tanto
+te
+tempo
+terzo
+th
+ti
+titolo
+tra
+tranne
+tre
+trenta
+triplo
+troppo
+trovato
+tu
+tua
+tue
+tuo
+tuoi
+tutta
+tuttavia
+tutte
+tutti
+tutto
+uguali
+ulteriore
+ultimo
+un
+una
+uno
+uomo
+va
+vai
+vale
+vari
+varia
+varie
+vario
+verso
+vi
+vicino
+visto
+vita
+voi
+volta
+volte
+vostra
+vostre
+vostri
+vostro
+Ã¨
\ No newline at end of file
diff --git a/static/stopwords/ja b/static/stopwords/ja
new file mode 100644
index 0000000..38735ee
--- /dev/null
+++ b/static/stopwords/ja
@@ -0,0 +1,134 @@
+ããã
+ãã£
+ãã®
+ãã®ãã
+ãã®äºº
+ãã
+ããã¾ã
+ãã
+ãã
+ã
+ãã
+ãã¾ã
+ãã
+ã
+ãã¡
+ã
+ã
+ããã³
+ãã
+ããã¾ã
+ã
+ãã¤ã¦
+ãã
+ã
+ã
+ãã
+ãã¡ã
+ãã¨
+ãã®
+ãã
+ããã
+ã
+ããã«
+ã
+ããã
+ãã
+ã
+ã
+ãã
+ãã
+ããã¦
+ãã®
+ãã®ä»
+ãã®å¾
+ãã
+ãããã
+ããã§
+ã
+ãã ã
+ãã¡
+ãã
+ãã
+ã 
+ã ã£
+ã ã
+ã¤
+ã¦
+ã§
+ã§ã
+ã§ãã
+ã§ã
+ã§ã¯
+ã§ã
+ã¨
+ã¨ãã
+ã¨ãã£ã
+ã¨ã
+ã¨ãã
+ã¨ãã¦
+ã¨ã¨ãã«
+ã¨ã
+ã¨å±ã«
+ã©ã
+ã©ã®
+ãª
+ãªã
+ãªã
+ãªãã£
+ãªãã
+ãªã
+ãªã£
+ãªã©
+ãªã«
+ãªã
+ãªã
+ãªã
+ãªã
+ã«
+ã«ããã¦
+ã«ããã
+ã«ã¤ãã¦
+ã«ã¦
+ã«ãã£ã¦
+ã«ãã
+ã«ãã
+ã«å¯¾ãã¦
+ã«å¯¾ãã
+ã«é¢ãã
+ã®
+ã®ã§
+ã®ã¿
+ã¯
+ã°
+ã¸
+ã»ã
+ã»ã¨ãã©
+ã»ã©
+ã¾ã
+ã¾ã
+ã¾ãã¯
+ã¾ã§
+ã
+ãã®
+ãã®ã®
+ã
+ãã
+ãã
+ã
+ãã
+ããã
+ã
+ãã
+ã
+ã
+ä½
+åã³
+å½¼
+å½¼å¥³
+æã
+ç¹ã«
+ç§
+ç§é
+è²´æ¹
+è²´æ¹æ¹
\ No newline at end of file
diff --git a/static/stopwords/ko b/static/stopwords/ko
new file mode 100644
index 0000000..4465f0f
--- /dev/null
+++ b/static/stopwords/ko
@@ -0,0 +1,679 @@
+!
+"
+$
+%
+&
+'
+(
+)
+*
++
+,
+-
+.
+...
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+;
+<
+=
+>
+?
+@
+\
+^
+_
+`
+|
+~
+Â·
+â
+ââ
+â
+â
+â
+â
+â¦
+ã
+ã
+ã
+ã
+ã
+ã
+ê°
+ê°ê¹ì¤ë¡
+ê°ë ¹
+ê°
+ê°ê°
+ê°ì
+ê°ì¢
+ê°ê³ ë§íìë©´
+ê°ë¤
+ê°ì´
+ê°ìì¹ìê³ 
+ê±°ëì
+ê±°ë°
+ê±°ì
+ê²
+ê²ê³¼ ê°ì´
+ê²ë¤
+ê²ë¤ê°
+ê²ì°ë¤
+ê²¨ì°
+ê²¬ì§ìì
+ê²°ê³¼ì ì´ë¥´ë¤
+ê²°êµ­
+ê²°ë¡ ì ë¼ ì ìë¤
+ê²¸ì¬ê²¸ì¬
+ê³ ë ¤íë©´
+ê³ ë¡
+ê³§
+ê³µëì¼ë¡
+ê³¼
+ê³¼ì°
+ê´ê³ê° ìë¤
+ê´ê³ìì´
+ê´ë ¨ì´ ìë¤
+ê´íì¬
+ê´í
+ê´í´ìë
+êµ¬
+êµ¬ì²´ì ì¼ë¡
+êµ¬í íë¤
+ê·¸
+ê·¸ë¤
+ê·¸ë
+ê·¸ë
+ê·¸ëë
+ê·¸ëì
+ê·¸ë¬ë
+ê·¸ë¬ë
+ê·¸ë¬ëê¹
+ê·¸ë¬ë©´
+ê·¸ë¬ë¯ë¡
+ê·¸ë¬íì¦
+ê·¸ë° ê¹ë­ì
+ê·¸ë°ë°
+ê·¸ë°ì¦
+ê·¸ë¼
+ê·¸ë¼ìë ë¶êµ¬íê³ 
+ê·¸ë ê² í¨ì¼ë¡ì¨
+ê·¸ë ì§
+ê·¸ë ì§ ìë¤ë©´
+ê·¸ë ì§ ìì¼ë©´
+ê·¸ë ì§ë§
+ê·¸ë ì§ìì¼ë©´
+ê·¸ë¦¬ê³ 
+ê·¸ë¦¬íì¬
+ê·¸ë§ì´ë¤
+ê·¸ì ë°ë¥´ë
+ê·¸ìì
+ê·¸ì 
+ê·¸ì¤ìì
+ê·¸ì¹ì§ ìë¤
+ê·¼ê±°ë¡
+ê·¼ê±°íì¬
+ê¸°ëì¬
+ê¸°ì ì¼ë¡
+ê¸°ì¤ì¼ë¡
+ê¸°í
+ê¹ë­ì¼ë¡
+ê¹ì
+ê¹ì§
+ê¹ì§ ë¯¸ì¹ë¤
+ê¹ì§ë
+ê½ë¹
+ëë
+ë¼ìµ
+ë
+ëë¨¸ì§ë
+ë¨ë¤
+ë¨ì§
+ë
+ëí¬
+ëí¬ë¤
+ë¤
+ë·
+ë
+ë¼íì§ ìë¤
+ëë¼ë¤
+ëê° ìê² ëê°
+ëêµ¬
+ë¤ë¥¸
+ë¤ë¥¸ ë°©ë©´ì¼ë¡
+ë¤ë§
+ë¤ì¯
+ë¤ì
+ë¤ì
+ë¤ì ë§íìë©´
+ë¤ìë§íë©´
+ë¤ì
+ë¤ìì
+ë¤ìì¼ë¡
+ë¨ì§
+ëµë¤
+ë¹ì 
+ë¹ì¥
+ëë¡ íë¤
+ëíë©´
+ëíì¬
+ëí´ ë§íìë©´
+ëí´ì
+ëê·¸
+ëêµ¬ë
+ëêµ°ë¤ë
+ëë¼ë
+ëë¶ì´
+ëì±ë
+ëì±ì´ë
+ëë¬íë¤
+ëì°©íë¤
+ëìì
+ëì
+ëë°ìì¼
+ëì´ì
+ëë²ì§¸ë¡
+ë
+ë¥ë¥
+ë¤ë°ë¼
+ë¤ì´ì´
+ë ê°ì
+ë¤
+ë±
+ë±ë±
+ë©ë
+ë°ë¼
+ë°ë¼ì
+ë°ì
+ë°ì§ì§ ìë¤
+ë±
+ë
+ëê° ëì´
+ëë¬¸ì
+ë
+ëí
+ëë
+ë¼ í´ë
+ë ¹
+ë¡
+ë¡ ì¸íì¬
+ë¡ë¶í°
+ë¡ì¨
+ë¥
+ë¥¼
+ë§ìëë¡
+ë§ì 
+ë§ì ë
+ë§ì¹
+ë§ë¡ íê³ 
+ë§ ëª»íë¤
+ë§ì½
+ë§ì½ì
+ë§ì ìëë¤
+ë§ì´ ìëë¤
+ë§ì¼
+ë§í¼
+ë§íìë©´
+ë§í ê²ë ìê³ 
+ë§¤
+ë§¤ë²
+ë©ì°ê²ë¤
+ëª
+ëª¨
+ëª¨ë
+ë¬´ë µ
+ë¬´ë¦ì°ê³ 
+ë¬´ì¨
+ë¬´ì
+ë¬´ìëë¬¸ì
+ë¬¼ë¡ 
+ë°
+ë°ê¾¸ì´ë§íë©´
+ë°ê¾¸ì´ë§íìë©´
+ë°ê¾¸ì´ì ë§íë©´
+ë°ê¾¸ì´ì íë¤ë©´
+ë°ê¿ ë§íë©´
+ë°ë¡
+ë°ìê°ì´
+ë°ì ìëë¤
+ë°ëë¡
+ë°ëë¡ ë§íìë©´
+ë°ëì
+ë²ê¸
+ë³´ëë°ì
+ë³´ë¤ë
+ë³´ëë
+ë³¸ëë¡
+ë´
+ë´ë¼
+ë¶ë¥ì ì¬ëë¤
+ë¶í°
+ë¶êµ¬íê³ 
+ë¶ë¬¸íê³ 
+ë¶ë¶
+ë¹ê±±ê±°ë¦¬ë¤
+ë¹êµì 
+ë¹ê¸¸ì ìë¤
+ë¹ë¡ì
+ë¹ë¡
+ë¹ì·íë¤
+ë¹ì¶ì´ ë³´ì
+ë¹íë©´
+ë¿ë§ ìëë¼
+ë¿ë§ìëë¼
+ë¿ì´ë¤
+ìê±±
+ìê±±ê±°ë¦¬ë¤
+ì¬
+ì¼
+ìëì ì¼ë¡ ë§íìë©´
+ìê°íëë¡
+ì¤ë ¹
+ì¤ë§
+ì¤ì¬
+ì
+ìì
+ìì¸
+ì¨
+ì¿
+ìµëê¹
+ìµëë¤
+ìê°
+ìê°
+ììíì¬
+ìì´ì
+ìí¤ë¤
+ì¤ë¡
+ì¬ì§ì´
+ì
+ìë
+ìëëë¤ë¥¼ê°
+ìëë¼ë©´
+ìëë©´
+ìëìë¤ë©´
+ìëì
+ìë¬´ê±°ë
+ìë¬´ë
+ìì¼
+ìì¸ë¬
+ìì´
+ìì´ê³ 
+ìì´êµ¬
+ìì´ì¼
+ìì´ì¿ 
+ìí
+ìí
+ì ê·¸ë¬ë©´
+ìê¸° ìíì¬
+ìê¸° ìí´ì
+ì ì ìë¤
+ììì´
+ì
+ììì
+ììê²
+ì¼
+ì½ê°
+ìì
+ì´
+ì´ê¸°ì¬ì°¨
+ì´ë
+ì´ë ëë
+ì´ëê²
+ì´ëê³³
+ì´ëë
+ì´ëìª½
+ì´ëí´
+ì´ë
+ì´ë
+ì´ë í
+ì´ë¤
+ì´ë¤ê²
+ì´ë¤ê²ë¤
+ì´ë»ê²
+ì´ë»í´
+ì´ì´
+ì´ì§¸ì
+ì´ì¨ë 
+ì´ì©ì ìë¤
+ì´ì°
+ì´ì°ëë 
+ì´ì°ëì´
+ì´ì°íë ì§
+ì´ì°íì¬
+ì¸ì 
+ì¸ì  ê°
+ì¼ë§
+ì¼ë§ ì ëë ê²
+ì¼ë§ê°
+ì¼ë§ë
+ì¼ë§ë ì§
+ì¼ë§ë§í¼
+ì¼ë§í¼
+ìì
+ì
+ì ê°ì
+ì ë¬ë ¤ ìë¤
+ì ëí´
+ì ìë¤
+ì ííë¤
+ìê²
+ìì
+ì¬
+ì¬ê¸°
+ì¬ë
+ì¬ë¬ë¶
+ì¬ë³´ìì¤
+ì¬ë¶
+ì¬ì¯
+ì¬ì í
+ì¬ì°¨
+ì°ê´ëë¤
+ì°ì´ì
+ì
+ìì°¨
+ìì¬ë
+ì
+ìë¥¼ ë¤ë©´
+ìë¥¼ ë¤ìë©´
+ìì»¨ë
+ìíë©´
+ì¤
+ì¤ë¡ì§
+ì¤ë¥´ë¤
+ì¤ìë§ì
+ì¤ì§
+ì¤í¸
+ì¤íë ¤
+ì
+ì ê°ì ì¬ëë¤
+ìë¥´ë¥´
+ìì
+ì
+ìëíë©´
+ì¸ìë
+ìë§í¼
+ìë§í ê²
+ìë§íê±¸
+ìì»¨ë
+ì°ë¥´ë¥´
+ì°ë¦¬
+ì°ë¦¬ë¤
+ì°ì 
+ì°ì ì¢í©íê²ê³¼ê°ì´
+ì´ì´
+ì
+ììì ìì íë°ìê°ì´
+ìíì¬
+ìí´ì
+ìì
+ì¡
+ì¼ë¡
+ì¼ë¡ ì¸íì¬
+ì¼ë¡ì
+ì¼ë¡ì¨
+ì
+ì
+ìë¹
+ì
+ìê±°íì¬
+ìì§íì¬
+ìí´
+ìí´ëë¤
+ìí´ì
+ì´
+ì´ ëë¤
+ì´ ëë¬¸ì
+ì´ ë°ì
+ì´ ì¸ì
+ì´ ì ëì
+ì´ê²
+ì´ê³³
+ì´ë
+ì´ë¼ë©´
+ì´ë
+ì´ë¬ì´ë¬íë¤
+ì´ë¬í
+ì´ë°
+ì´ë´ì ëë¡
+ì´ë ê² ë§ì ê²
+ì´ë ê²ëë©´
+ì´ë ê²ë§íìë©´
+ì´ë êµ¬ë
+ì´ë¡ ì¸íì¬
+ì´ë¥´ê¸°ê¹ì§
+ì´ë¦¬íì¬
+ì´ë§í¼
+ì´ë²
+ì´ë´
+ì´ì
+ì´ì´ì
+ì´ìë¤
+ì´ì ê°ë¤
+ì´ì ê°ì
+ì´ì ë°ëë¡
+ì´ìê°ë¤ë©´
+ì´ì¸ìë
+ì´ì©íì¬
+ì´ì ë§ì¼ë¡
+ì´ì  
+ì´ì§ë§
+ì´ìª½
+ì´ì²êµ¬
+ì´ì²ì¡
+ì´ì²ì¹ 
+ì´ì²í
+ì¸ ë¯íë¤
+ì¸ì  
+ì¼
+ì¼ê²ì´ë¤
+ì¼ê³±
+ì¼ë¨
+ì¼ë
+ì¼ë°ì ì¼ë¡
+ì¼ì§ë¼ë
+ìì íë¦¼ìë¤
+ìê°íì¬
+ìì¥ìì
+ìë°ë¼
+ìë¤
+ì
+ìê¸°
+ìê¸°ì§
+ìë§ì
+ìì 
+ì ê¹
+ì ì
+ì 
+ì ê²
+ì ê²ë§í¼
+ì ê¸°
+ì ìª½
+ì í¬
+ì ë¶
+ì ì
+ì í
+ì ìì ë³´ì
+ì ëì ì´ë¥´ë¤
+ì 
+ì ê°ê¸°
+ì ì¸íê³ 
+ì¡°ê¸
+ì¡°ì°¨
+ì¡°ì°¨ë
+ì¡¸ì¡¸
+ì¢
+ì¢ì
+ì¢ì¢
+ì£¼ë£©ì£¼ë£©
+ì£¼ì íì§ ìê³ 
+ì¤ì ëª°ëë¤
+ì¤ìëª¨ë¥¸ë¤
+ì¤ìì
+ì¤ìíë
+ì¦ìíì¬
+ì¦
+ì¦ì
+ì§ë ì§
+ì§ë§
+ì§ë§ê³ 
+ì§ì§ë¡
+ìª½ì¼ë¡
+ì°¨ë¼ë¦¬
+ì°¸
+ì°¸ë
+ì²«ë²ì§¸ë¡
+ì³
+ì´ì ì¼ë¡
+ì´ì ì¼ë¡ ë§íë©´
+ì´ì ì¼ë¡ ë³´ë©´
+ì¹ 
+ì½¸ì½¸
+ì¾ì¾
+ì¿µ
+íë¤
+íì¸
+íí
+í íë¤
+íµíì¬
+í­
+í¤
+íí
+í
+í
+í½
+íë 
+í
+íê²ë ê²ì´ë¤
+íê²íë¤
+íê² ëê°
+íê³  ìë¤
+íê³ ììë¤
+íê³¤íìë¤
+íêµ¬ë
+íê¸° ëë¬¸ì
+íê¸° ìíì¬
+íê¸°ëíë°
+íê¸°ë§ íë©´
+íê¸°ë³´ë¤ë
+íê¸°ì
+íë
+íëë
+íë ê¹ì
+íë í¸ì´ ë«ë¤
+íëê²ë
+íëê²ë§ ëª»íë¤
+íëê²ì´ ë«ë¤
+íëë°
+íëë¼ë
+íëë¤
+íëë¡ìí¤ë¤
+íëë¡íë¤
+íë ì§
+íë ¤ê³ íë¤
+íë§í°ë©´
+íë©´ í ìë¡
+íë©´ëë¤
+íë©´ì
+íë¬¼ë©°
+íì¬ê¸
+íì¬ì¼
+íìë§ì
+íì§ ìëë¤ë©´
+íì§ ìëë¡
+íì§ë§
+íì§ë§ë¼
+íì§ë§
+íí
+í ê¹ë­ì
+í ì´ì ë
+í í
+íë¤ë©´
+íë¤ë©´ ëª°ë¼ë
+íë°
+íë§ë
+íì ì´ìë¤
+íì¼ ì¼ë¡ë
+íí­ëª©
+í  ë°ë¦ì´ë¤
+í  ìê°ì´ë¤
+í  ì¤ ìë¤
+í  ì§ê²½ì´ë¤
+í  íì´ ìë¤
+í ë
+í ë§íë¤
+í ë§ì 
+í ë¿
+í ììë¤
+í ììì´
+í ì¤ìë¤
+í ì§ë¼ë
+í ì§ì¸ì 
+í¨ê»
+í´ëëë¤
+í´ëì¢ë¤
+í´ë´ì
+í´ìë ìëë¤
+í´ì¼íë¤
+í´ì
+íì´ì
+í¥íë¤
+í¥íì¬
+í¥í´ì
+í
+íê±±
+íí
+í
+íí
+íë¡íë¡
+íìì¼ë¡ ì°ì¬
+í¹ì
+í¹ì
+í¼ì
+í¨ì¬
+íìµ
+í´
+íí
+í¥
+íìì´
+ï¸¿
+ï¼
+ï¼
+ï¼
+ï¼
+ï¼
+ï¼
+ï¼
+ï¼
+ï¼
+ï¼
+ï¼
+ï¼
+ï¼
+ï¼
+ï¼
+ï¼
+ï¼
+ï¼
+ï¼
+ï¼
+ï¼
+ï¼
+ï¼
+ï¼
+ï¼
+ï¼ 
+ï¼»
+ï¼½
+ï½
+ï½
+ï½
+ï½
+ï¿¥
\ No newline at end of file
diff --git a/static/stopwords/ku b/static/stopwords/ku
new file mode 100644
index 0000000..7b12cfe
--- /dev/null
+++ b/static/stopwords/ku
@@ -0,0 +1,62 @@
+Ø¦ÛÙÛ
+Ø¦ÛÙÛ
+Ø¦ÛÙ
+Ø¦ÛÙ
+Ø¦ÛÙØ§Ù
+Ø¦ÛÙÛÛ
+Ø¨Û
+Ø¨Û
+Ø¨ÛØ¬Ú¯Û
+Ø¨Û
+Ø¨ÛØ¨Û
+Ø¨ÛØ¯ÛÙ
+Ø¨ÛØ±Ø¯ÛÙ
+Ø¨ÛØ±ÙÛ
+Ø¨ÛØ±ÛÙÛ
+Ø¨ÛØ±ÛÙÛ
+Ø¨ÛÙØ§Û
+Ø¨ÛÙ¾ÛÛ
+ØªÛ
+ØªÛ
+Ø¬Ú¯Û
+Ø¯ÙØ§Û
+Ø¯ÙÙ
+Ø¯Û
+Ø¯ÛÚ©Ø§Øª
+Ø¯ÛÚ¯ÛÚµ
+Ø³ÛØ±
+ÙÛ
+ÙÛ
+ÙÛØ¨Ø§Ø¨ÛØª
+ÙÛØ¨Ø§ØªÛ
+ÙÛØ¨Ø§Ø±ÛÛ
+ÙÛØ¨Ø±ÛØªÛ
+ÙÛØ¨Ù
+ÙÛØ¨ÛØ±
+ÙÛØ¨ÛÛÙÛ
+ÙÛØ¯ÛÙ
+ÙÛØ±Û
+ÙÛØ±ÛÚ¯Ø§
+ÙÛØ±ÛÙÛ
+ÙÛØ³ÛØ±
+ÙÛÙØ§ÛÛÙ
+ÙÛÙØ§Ù
+ÙÛÙÛÙ
+ÙÛÙ
+ÙÛÙ¾ÛÙØ§ÙÛ
+ÙÛÚÛØ±
+ÙÛÚ¯ÛÚµ
+ÙÙ
+ÙØ§Ù
+ÙÛÙØ§Ù
+ÙÛØ±
+ÙÛØ±ÙÛÙØ§
+Ù
+ÙÛÚ©
+Ù¾Ø§Ø´
+Ù¾Û
+Ù¾ÛØ´
+ÚÛÙØ¯
+Ú©Ø±Ø¯
+Ú©Û
+Û
\ No newline at end of file
diff --git a/static/stopwords/la b/static/stopwords/la
new file mode 100644
index 0000000..00c5759
--- /dev/null
+++ b/static/stopwords/la
@@ -0,0 +1,49 @@
+a
+ab
+ac
+ad
+at
+atque
+aut
+autem
+cum
+de
+dum
+e
+erant
+erat
+est
+et
+etiam
+ex
+haec
+hic
+hoc
+in
+ita
+me
+nec
+neque
+non
+per
+qua
+quae
+quam
+qui
+quibus
+quidem
+quo
+quod
+re
+rebus
+rem
+res
+sed
+si
+sic
+sunt
+tamen
+tandem
+te
+ut
+vel
\ No newline at end of file
diff --git a/static/stopwords/lt b/static/stopwords/lt
new file mode 100644
index 0000000..6b5242a
--- /dev/null
+++ b/static/stopwords/lt
@@ -0,0 +1,474 @@
+abi
+abidvi
+abiejose
+abiejuose
+abiejÃ¸
+abiem
+abigaliai
+abipus
+abu
+abudu
+ai
+ana
+anaiptol
+anaisiais
+anajai
+anajam
+anajame
+anapus
+anas
+anasai
+anasis
+anei
+aniedvi
+anieji
+aniesiems
+anoji
+anojo
+anojoje
+anokia
+anoks
+anosiomis
+anosioms
+anosios
+anosiose
+anot
+ant
+antai
+anuodu
+anuoju
+anuosiuose
+anuosius
+anÃ ja
+anÃ jÃ 
+anÃ jÃ¡
+anÃ sias
+anÃ¸jÃ¸
+apie
+aplink
+ar
+arba
+argi
+arti
+aukÃ°Ã¨iau
+aÃ°
+be
+bei
+beje
+bemaÃ¾
+bent
+bet
+betgi
+beveik
+dar
+dargi
+daugmaÃ¾
+deja
+dÃ«ka
+dÃ«l
+dÃ«lei
+dÃ«lto
+ech
+et
+gal
+galbÃ»t
+galgi
+gan
+gana
+gi
+greta
+idant
+iki
+ir
+irgi
+it
+itin
+iÃ°
+iÃ°ilgai
+iÃ°vis
+jaisiais
+jajai
+jajam
+jajame
+jei
+jeigu
+ji
+jiedu
+jiedvi
+jieji
+jiesiems
+jinai
+jis
+jisai
+jog
+joji
+jojo
+jojoje
+jokia
+joks
+josiomis
+josioms
+josios
+josiose
+judu
+judvi
+juk
+jumis
+jums
+jumyse
+juodu
+juoju
+juosiuose
+juosius
+jus
+jÃ ja
+jÃ jÃ 
+jÃ sias
+jÃ¡jÃ¡
+jÃ¸jÃ¸
+jÃ»s
+jÃ»siÃ°kis
+jÃ»siÃ°kÃ«
+jÃ»sÃ¸
+kad
+kada
+kadangi
+kai
+kaip
+kaipgi
+kas
+katra
+katras
+katriedvi
+katruodu
+kaÃ¾in
+kaÃ¾kas
+kaÃ¾katra
+kaÃ¾katras
+kaÃ¾kokia
+kaÃ¾koks
+kaÃ¾kuri
+kaÃ¾kuris
+kiaurai
+kiek
+kiekvienas
+kieno
+kita
+kitas
+kitokia
+kitoks
+kodÃ«l
+kokia
+koks
+kol
+kolei
+kone
+kuomet
+kur
+kurgi
+kuri
+kuriedvi
+kuris
+kuriuodu
+lai
+lig
+ligi
+link
+lyg
+man
+manaisiais
+manajai
+manajam
+manajame
+manas
+manasai
+manasis
+mane
+manieji
+maniesiems
+manim
+manimi
+maniÃ°kis
+maniÃ°kÃ«
+mano
+manoji
+manojo
+manojoje
+manosiomis
+manosioms
+manosios
+manosiose
+manuoju
+manuosiuose
+manuosius
+manyje
+manÃ ja
+manÃ jÃ 
+manÃ jÃ¡
+manÃ sias
+manÃ¦s
+manÃ¸jÃ¸
+mat
+maÃ¾daug
+maÃ¾ne
+mes
+mudu
+mudvi
+mumis
+mums
+mumyse
+mus
+mÃ»siÃ°kis
+mÃ»siÃ°kÃ«
+mÃ»sÃ¸
+na
+nagi
+ne
+nebe
+nebent
+negi
+negu
+nei
+nejau
+nejaugi
+nekaip
+nelyginant
+nes
+net
+netgi
+netoli
+neva
+nors
+nuo
+nÃ«
+o
+ogi
+oi
+paeiliui
+pagal
+pakeliui
+palaipsniui
+palei
+pas
+pasak
+paskos
+paskui
+paskum
+pat
+pati
+patiems
+paties
+pats
+patys
+patÃ¡
+paÃ¨iais
+paÃ¨iam
+paÃ¨iame
+paÃ¨iu
+paÃ¨iuose
+paÃ¨ius
+paÃ¨iÃ¸
+per
+pernelyg
+pirm
+pirma
+pirmiau
+po
+prie
+prieÃ°
+prieÃ°ais
+pro
+pusiau
+rasi
+rodos
+sau
+savaisiais
+savajai
+savajam
+savajame
+savas
+savasai
+savasis
+save
+savieji
+saviesiems
+savimi
+saviÃ°kis
+saviÃ°kÃ«
+savo
+savoji
+savojo
+savojoje
+savosiomis
+savosioms
+savosios
+savosiose
+savuoju
+savuosiuose
+savuosius
+savyje
+savÃ ja
+savÃ jÃ 
+savÃ jÃ¡
+savÃ sias
+savÃ¦s
+savÃ¸jÃ¸
+skersai
+skradÃ¾iai
+staÃ¨iai
+su
+sulig
+ta
+tad
+tai
+taigi
+taip
+taipogi
+taisiais
+tajai
+tajam
+tajame
+tamsta
+tarp
+tarsi
+tartum
+tarytum
+tas
+tasai
+tau
+tavaisiais
+tavajai
+tavajam
+tavajame
+tavas
+tavasai
+tavasis
+tave
+tavieji
+taviesiems
+tavimi
+taviÃ°kis
+taviÃ°kÃ«
+tavo
+tavoji
+tavojo
+tavojoje
+tavosiomis
+tavosioms
+tavosios
+tavosiose
+tavuoju
+tavuosiuose
+tavuosius
+tavyje
+tavÃ ja
+tavÃ jÃ 
+tavÃ jÃ¡
+tavÃ sias
+tavÃ¦s
+tavÃ¸jÃ¸
+taÃ¨iau
+te
+tegu
+tegul
+tiedvi
+tieji
+ties
+tiesiems
+tiesiog
+tik
+tikriausiai
+tiktai
+toji
+tojo
+tojoje
+tokia
+toks
+tol
+tolei
+toliau
+tosiomis
+tosioms
+tosios
+tosiose
+tu
+tuodu
+tuoju
+tuosiuose
+tuosius
+turbÃ»t
+tÃ ja
+tÃ jÃ 
+tÃ jÃ¡
+tÃ sias
+tÃ¸jÃ¸
+tÃ»las
+uÃ¾
+uÃ¾tat
+uÃ¾vis
+va
+vai
+viduj
+vidury
+vien
+vienas
+vienokia
+vienoks
+vietoj
+virÃ°
+virÃ°uj
+virÃ°um
+vis
+vis dÃ«lto
+visa
+visas
+visgi
+visokia
+visoks
+vos
+vÃ«l
+vÃ«lgi
+ypaÃ¨
+Ã¡
+Ã¡kypai
+Ã¡striÃ¾ai
+Ã°alia
+Ã°e
+Ã°i
+Ã°iaisiais
+Ã°iajai
+Ã°iajam
+Ã°iajame
+Ã°iapus
+Ã°iedvi
+Ã°ieji
+Ã°iesiems
+Ã°ioji
+Ã°iojo
+Ã°iojoje
+Ã°iokia
+Ã°ioks
+Ã°iosiomis
+Ã°iosioms
+Ã°iosios
+Ã°iosiose
+Ã°is
+Ã°isai
+Ã°it
+Ã°ita
+Ã°itas
+Ã°itiedvi
+Ã°itokia
+Ã°itoks
+Ã°ituodu
+Ã°iuodu
+Ã°iuoju
+Ã°iuosiuose
+Ã°iuosius
+Ã°iÃ ja
+Ã°iÃ jÃ 
+Ã°iÃ sias
+Ã°iÃ¸jÃ¸
+Ã°tai
+Ã°Ã¡jÃ¡
+Ã¾emiau
\ No newline at end of file
diff --git a/static/stopwords/lv b/static/stopwords/lv
new file mode 100644
index 0000000..71fb149
--- /dev/null
+++ b/static/stopwords/lv
@@ -0,0 +1,161 @@
+aiz
+ap
+apakÅ¡
+apakÅ¡pus
+ar
+arÄ«
+augÅ¡pus
+bet
+bez
+bija
+biji
+biju
+bijÄm
+bijÄt
+bÅ«s
+bÅ«si
+bÅ«siet
+bÅ«sim
+bÅ«t
+bÅ«Å¡u
+caur
+diemÅ¾Äl
+diezin
+droÅ¡i
+dÄÄ¼
+esam
+esat
+esi
+esmu
+gan
+gar
+iekam
+iekams
+iekÄm
+iekÄms
+iekÅ¡
+iekÅ¡pus
+ik
+ir
+it
+itin
+iz
+ja
+jau
+jeb
+jebÅ¡u
+jel
+jo
+jÄ
+ka
+kamÄr
+kaut
+kolÄ«dz
+kopÅ¡
+kÄ
+kÄ¼uva
+kÄ¼uvi
+kÄ¼uvu
+kÄ¼uvÄm
+kÄ¼uvÄt
+kÄ¼Å«s
+kÄ¼Å«si
+kÄ¼Å«siet
+kÄ¼Å«sim
+kÄ¼Å«st
+kÄ¼Å«stam
+kÄ¼Å«stat
+kÄ¼Å«sti
+kÄ¼Å«stu
+kÄ¼Å«t
+kÄ¼Å«Å¡u
+labad
+lai
+lejpus
+lÄ«dz
+lÄ«dzko
+ne
+nebÅ«t
+nedz
+nekÄ
+nevis
+nezin
+no
+nu
+nÄ
+otrpus
+pa
+par
+pat
+pie
+pirms
+pret
+priekÅ¡
+pÄr
+pÄc
+starp
+tad
+tak
+tapi
+taps
+tapsi
+tapsiet
+tapsim
+tapt
+tapÄt
+tapÅ¡u
+taÄu
+te
+tiec
+tiek
+tiekam
+tiekat
+tieku
+tik
+tika
+tikai
+tiki
+tikko
+tiklab
+tiklÄ«dz
+tiks
+tiksiet
+tiksim
+tikt
+tiku
+tikvien
+tikÄm
+tikÄt
+tikÅ¡u
+tomÄr
+topat
+turpretim
+turpretÄ«
+tÄ
+tÄdÄÄ¼
+tÄlab
+tÄpÄc
+un
+uz
+vai
+var
+varat
+varÄja
+varÄji
+varÄju
+varÄjÄm
+varÄjÄt
+varÄs
+varÄsi
+varÄsiet
+varÄsim
+varÄt
+varÄÅ¡u
+vien
+virs
+virspus
+vis
+viÅpus
+zem
+Ärpus
+Å¡aipus
\ No newline at end of file
diff --git a/static/stopwords/mr b/static/stopwords/mr
new file mode 100644
index 0000000..2034713
--- /dev/null
+++ b/static/stopwords/mr
@@ -0,0 +1,99 @@
+à¤à¤§à¤¿à¤
+à¤à¤¨à¥à¤
+à¤à¤¶à¥
+à¤à¤¸à¤²à¤¯à¤¾à¤à¥
+à¤à¤¸à¤²à¥à¤²à¥à¤¯à¤¾
+à¤à¤¸à¤¾
+à¤à¤¸à¥à¤¨
+à¤à¤¸à¥
+à¤à¤
+à¤à¤£à¤¿
+à¤à¤¤à¤¾
+à¤à¤ªà¤²à¥à¤¯à¤¾
+à¤à¤²à¤¾
+à¤à¤²à¥
+à¤à¤²à¥
+à¤à¤¹à¥
+à¤à¤¹à¥à¤¤
+à¤à¤
+à¤à¤à¤¾
+à¤à¤®à¥
+à¤à¤°à¤£à¤¯à¤¾à¤¤
+à¤à¤°à¥à¤¨
+à¤à¤¾
+à¤à¤¾à¤®
+à¤à¤¾à¤¯
+à¤à¤¾à¤¹à¥
+à¤à¤¿à¤µà¤¾
+à¤à¥
+à¤à¥à¤²à¤¾
+à¤à¥à¤²à¥
+à¤à¥à¤²à¥
+à¤à¥à¤à¥
+à¤à¥à¤²à¥à¤¯à¤¾
+à¤à¥à¤à¤¨
+à¤à¤¾à¤¤
+à¤à¤¾à¤²à¤¾
+à¤à¤¾à¤²à¥
+à¤à¤¾à¤²à¥
+à¤à¤¾à¤²à¥à¤²à¥à¤¯à¤¾
+à¤à¤¾
+à¤¡à¥
+à¤¤à¤°
+à¤¤à¤°à¥
+à¤¤à¤¸à¥à¤
+à¤¤à¤¾
+à¤¤à¥
+à¤¤à¥à¤¨
+à¤¤à¥
+à¤¤à¥
+à¤¤à¥à¤¯à¤¾
+à¤¤à¥à¤¯à¤¾à¤à¤¾
+à¤¤à¥à¤¯à¤¾à¤à¥
+à¤¤à¥à¤¯à¤¾à¤à¥à¤¯à¤¾
+à¤¤à¥à¤¯à¤¾à¤¨à¤¾
+à¤¤à¥à¤¯à¤¾à¤¨à¥
+à¤¤à¥à¤¯à¤¾à¤®à¥à¤³à¥
+à¤¤à¥à¤°à¥
+à¤¦à¤¿à¤²à¥
+à¤¦à¥à¤¨
+à¤¨
+à¤¨à¤¾à¤¹à¥
+à¤¨à¤¿à¤°à¥à¤£à¥à¤¯
+à¤ªà¤£
+à¤ªà¤®
+à¤ªà¤°à¤¯à¤¤à¤¨
+à¤ªà¤¾à¤à¥à¤²
+à¤®
+à¤®à¤¾à¤¤à¥à¤°
+à¤®à¤¾à¤¹à¤¿à¤¤à¥
+à¤®à¥
+à¤®à¥à¤¬à¥
+à¤®à¥à¤¹à¤£à¤à¥
+à¤®à¥à¤¹à¤£à¤¾à¤²à¥
+à¤®à¥à¤¹à¤£à¥à¤¨
+à¤¯à¤¾
+à¤¯à¤¾à¤à¤¾
+à¤¯à¤¾à¤à¥
+à¤¯à¤¾à¤à¥à¤¯à¤¾
+à¤¯à¤¾à¤¨à¤¾
+à¤¯à¤¾à¤¨à¥
+à¤¯à¥à¤£à¤¾à¤°
+à¤¯à¥à¤¤
+à¤¯à¥à¤¥à¥à¤²
+à¤¯à¥à¤¥à¥
+à¤²à¤¾à¤
+à¤µ
+à¤µà¥à¤¯à¤à¤¤
+à¤¸à¤°à¥à¤µ
+à¤¸à¤¾à¤à¤¿à¤¤à¥à¤²à¥
+à¤¸à¥à¤°à¥
+à¤¹à¤à¤¾à¤°
+à¤¹à¤¾
+à¤¹à¥
+à¤¹à¥
+à¤¹à¥à¤£à¤¾à¤°
+à¤¹à¥à¤¤
+à¤¹à¥à¤¤à¤¾
+à¤¹à¥à¤¤à¥
+à¤¹à¥à¤¤à¥
\ No newline at end of file
diff --git a/static/stopwords/ms b/static/stopwords/ms
new file mode 100644
index 0000000..268a0b7
--- /dev/null
+++ b/static/stopwords/ms
@@ -0,0 +1,475 @@
+abdul
+abdullah
+acara
+ada
+adalah
+ahmad
+air
+akan
+akhbar
+akhir
+aktiviti
+alam
+amat
+amerika
+anak
+anggota
+antara
+antarabangsa
+apa
+apabila
+april
+as
+asas
+asean
+asia
+asing
+atas
+atau
+australia
+awal
+awam
+bagaimanapun
+bagi
+bahagian
+bahan
+baharu
+bahawa
+baik
+bandar
+bank
+banyak
+barangan
+baru
+baru-baru
+bawah
+beberapa
+bekas
+beliau
+belum
+berada
+berakhir
+berbanding
+berdasarkan
+berharap
+berikutan
+berjaya
+berjumlah
+berkaitan
+berkata
+berkenaan
+berlaku
+bermula
+bernama
+bernilai
+bersama
+berubah
+besar
+bhd
+bidang
+bilion
+bn
+boleh
+bukan
+bulan
+bursa
+cadangan
+china
+dagangan
+dalam
+dan
+dana
+dapat
+dari
+daripada
+dasar
+datang
+datuk
+demikian
+dengan
+depan
+derivatives
+dewan
+di
+diadakan
+dibuka
+dicatatkan
+dijangka
+diniagakan
+dis
+disember
+ditutup
+dolar
+dr
+dua
+dunia
+ekonomi
+eksekutif
+eksport
+empat
+enam
+faedah
+feb
+global
+hadapan
+hanya
+harga
+hari
+hasil
+hingga
+hubungan
+ia
+iaitu
+ialah
+indeks
+india
+indonesia
+industri
+ini
+islam
+isnin
+isu
+itu
+jabatan
+jalan
+jan
+jawatan
+jawatankuasa
+jepun
+jika
+jualan
+juga
+julai
+jumaat
+jumlah
+jun
+juta
+kadar
+kalangan
+kali
+kami
+kata
+katanya
+kaunter
+kawasan
+ke
+keadaan
+kecil
+kedua
+kedua-dua
+kedudukan
+kekal
+kementerian
+kemudahan
+kenaikan
+kenyataan
+kepada
+kepentingan
+keputusan
+kerajaan
+kerana
+kereta
+kerja
+kerjasama
+kes
+keselamatan
+keseluruhan
+kesihatan
+ketika
+ketua
+keuntungan
+kewangan
+khamis
+kini
+kira-kira
+kita
+klci
+klibor
+komposit
+kontrak
+kos
+kuala
+kuasa
+kukuh
+kumpulan
+lagi
+lain
+langkah
+laporan
+lebih
+lepas
+lima
+lot
+luar
+lumpur
+mac
+mahkamah
+mahu
+majlis
+makanan
+maklumat
+malam
+malaysia
+mana
+manakala
+masa
+masalah
+masih
+masing-masing
+masyarakat
+mata
+media
+mei
+melalui
+melihat
+memandangkan
+memastikan
+membantu
+membawa
+memberi
+memberikan
+membolehkan
+membuat
+mempunyai
+menambah
+menarik
+menawarkan
+mencapai
+mencatatkan
+mendapat
+mendapatkan
+menerima
+menerusi
+mengadakan
+mengambil
+mengenai
+menggalakkan
+menggunakan
+mengikut
+mengumumkan
+mengurangkan
+meningkat
+meningkatkan
+menjadi
+menjelang
+menokok
+menteri
+menunjukkan
+menurut
+menyaksikan
+menyediakan
+mereka
+merosot
+merupakan
+mesyuarat
+minat
+minggu
+minyak
+modal
+mohd
+mudah
+mungkin
+naik
+najib
+nasional
+negara
+negara-negara
+negeri
+niaga
+nilai
+nov
+ogos
+okt
+oleh
+operasi
+orang
+pada
+pagi
+paling
+pameran
+papan
+para
+paras
+parlimen
+parti
+pasaran
+pasukan
+pegawai
+pejabat
+pekerja
+pelabur
+pelaburan
+pelancongan
+pelanggan
+pelbagai
+peluang
+pembangunan
+pemberita
+pembinaan
+pemimpin
+pendapatan
+pendidikan
+penduduk
+penerbangan
+pengarah
+pengeluaran
+pengerusi
+pengguna
+pengurusan
+peniaga
+peningkatan
+penting
+peratus
+perdagangan
+perdana
+peringkat
+perjanjian
+perkara
+perkhidmatan
+perladangan
+perlu
+permintaan
+perniagaan
+persekutuan
+persidangan
+pertama
+pertubuhan
+pertumbuhan
+perusahaan
+peserta
+petang
+pihak
+pilihan
+pinjaman
+polis
+politik
+presiden
+prestasi
+produk
+program
+projek
+proses
+proton
+pukul
+pula
+pusat
+rabu
+rakan
+rakyat
+ramai
+rantau
+raya
+rendah
+ringgit
+rumah
+sabah
+sahaja
+saham
+sama
+sarawak
+satu
+sawit
+saya
+sdn
+sebagai
+sebahagian
+sebanyak
+sebarang
+sebelum
+sebelumnya
+sebuah
+secara
+sedang
+segi
+sehingga
+sejak
+sekarang
+sektor
+sekuriti
+selain
+selama
+selasa
+selatan
+selepas
+seluruh
+semakin
+semalam
+semasa
+sementara
+semua
+semula
+sen
+sendiri
+seorang
+sepanjang
+seperti
+sept
+september
+serantau
+seri
+serta
+sesi
+setiap
+setiausaha
+sidang
+singapura
+sini
+sistem
+sokongan
+sri
+sudah
+sukan
+suku
+sumber
+supaya
+susut
+syarikat
+syed
+tahap
+tahun
+tan
+tanah
+tanpa
+tawaran
+teknologi
+telah
+tempat
+tempatan
+tempoh
+tenaga
+tengah
+tentang
+terbaik
+terbang
+terbesar
+terbuka
+terdapat
+terhadap
+termasuk
+tersebut
+terus
+tetapi
+thailand
+tiada
+tidak
+tiga
+timbalan
+timur
+tindakan
+tinggi
+tun
+tunai
+turun
+turut
+umno
+unit
+untuk
+untung
+urus
+usaha
+utama
+walaupun
+wang
+wanita
+wilayah
+yang
\ No newline at end of file
diff --git a/static/stopwords/nl b/static/stopwords/nl
new file mode 100644
index 0000000..9c46fa3
--- /dev/null
+++ b/static/stopwords/nl
@@ -0,0 +1,413 @@
+aan
+aangaande
+aangezien
+achte
+achter
+achterna
+af
+afgelopen
+al
+aldaar
+aldus
+alhoewel
+alias
+alle
+allebei
+alleen
+alles
+als
+alsnog
+altijd
+altoos
+ander
+andere
+anders
+anderszins
+beetje
+behalve
+behoudens
+beide
+beiden
+ben
+beneden
+bent
+bepaald
+betreffende
+bij
+bijna
+bijv
+binnen
+binnenin
+blijkbaar
+blijken
+boven
+bovenal
+bovendien
+bovengenoemd
+bovenstaand
+bovenvermeld
+buiten
+bv
+daar
+daardoor
+daarheen
+daarin
+daarna
+daarnet
+daarom
+daarop
+daaruit
+daarvanlangs
+dan
+dat
+de
+deden
+deed
+der
+derde
+derhalve
+dertig
+deze
+dhr
+die
+dikwijls
+dit
+doch
+doe
+doen
+doet
+door
+doorgaand
+drie
+duizend
+dus
+echter
+een
+eens
+eer
+eerdat
+eerder
+eerlang
+eerst
+eerste
+eigen
+eigenlijk
+elk
+elke
+en
+enig
+enige
+enigszins
+enkel
+er
+erdoor
+erg
+ergens
+etc
+etcetera
+even
+eveneens
+evenwel
+gauw
+ge
+gedurende
+geen
+gehad
+gekund
+geleden
+gelijk
+gemoeten
+gemogen
+genoeg
+geweest
+gewoon
+gewoonweg
+haar
+haarzelf
+had
+hadden
+hare
+heb
+hebben
+hebt
+hedden
+heeft
+heel
+hem
+hemzelf
+hen
+het
+hetzelfde
+hier
+hierbeneden
+hierboven
+hierin
+hierna
+hierom
+hij
+hijzelf
+hoe
+hoewel
+honderd
+hun
+hunne
+ieder
+iedere
+iedereen
+iemand
+iets
+ik
+ikzelf
+in
+inderdaad
+inmiddels
+intussen
+inzake
+is
+ja
+je
+jezelf
+jij
+jijzelf
+jou
+jouw
+jouwe
+juist
+jullie
+kan
+klaar
+kon
+konden
+krachtens
+kun
+kunnen
+kunt
+laatst
+later
+liever
+lijken
+lijkt
+maak
+maakt
+maakte
+maakten
+maar
+mag
+maken
+me
+meer
+meest
+meestal
+men
+met
+mevr
+mezelf
+mij
+mijn
+mijnent
+mijner
+mijzelf
+minder
+miss
+misschien
+missen
+mits
+mocht
+mochten
+moest
+moesten
+moet
+moeten
+mogen
+mr
+mrs
+mw
+na
+naar
+nadat
+nam
+namelijk
+nee
+neem
+negen
+nemen
+nergens
+net
+niemand
+niet
+niets
+niks
+noch
+nochtans
+nog
+nogal
+nooit
+nu
+nv
+of
+ofschoon
+om
+omdat
+omhoog
+omlaag
+omstreeks
+omtrent
+omver
+ondanks
+onder
+ondertussen
+ongeveer
+ons
+onszelf
+onze
+onzeker
+ooit
+ook
+op
+opnieuw
+opzij
+over
+overal
+overeind
+overige
+overigens
+paar
+pas
+per
+precies
+recent
+redelijk
+reeds
+rond
+rondom
+samen
+sedert
+sinds
+sindsdien
+slechts
+sommige
+spoedig
+steeds
+tamelijk
+te
+tegen
+tegenover
+tenzij
+terwijl
+thans
+tien
+tiende
+tijdens
+tja
+toch
+toe
+toen
+toenmaals
+toenmalig
+tot
+totdat
+tussen
+twee
+tweede
+u
+uit
+uitgezonderd
+uw
+vaak
+vaakwat
+van
+vanaf
+vandaan
+vanuit
+vanwege
+veel
+veeleer
+veertig
+verder
+verscheidene
+verschillende
+vervolgens
+via
+vier
+vierde
+vijf
+vijfde
+vijftig
+vol
+volgend
+volgens
+voor
+vooraf
+vooral
+vooralsnog
+voorbij
+voordat
+voordezen
+voordien
+voorheen
+voorop
+voorts
+vooruit
+vrij
+vroeg
+waar
+waarom
+waarschijnlijk
+wanneer
+want
+waren
+was
+wat
+we
+wederom
+weer
+weg
+wegens
+weinig
+wel
+weldra
+welk
+welke
+werd
+werden
+werder
+wezen
+whatever
+wie
+wiens
+wier
+wij
+wijzelf
+wil
+wilden
+willen
+word
+worden
+wordt
+zal
+ze
+zei
+zeker
+zelf
+zelfde
+zelfs
+zes
+zeven
+zich
+zichzelf
+zij
+zijn
+zijne
+zijzelf
+zo
+zoals
+zodat
+zodra
+zonder
+zou
+zouden
+zowat
+zulk
+zulke
+zullen
+zult
\ No newline at end of file
diff --git a/static/stopwords/no b/static/stopwords/no
new file mode 100644
index 0000000..d36c367
--- /dev/null
+++ b/static/stopwords/no
@@ -0,0 +1,221 @@
+alle
+andre
+arbeid
+at
+av
+bare
+begge
+ble
+blei
+bli
+blir
+blitt
+bort
+bra
+bruke
+bÃ¥de
+bÃ¥e
+da
+de
+deg
+dei
+deim
+deira
+deires
+dem
+den
+denne
+der
+dere
+deres
+det
+dette
+di
+din
+disse
+ditt
+du
+dykk
+dykkar
+dÃ¥
+eg
+ein
+eit
+eitt
+eller
+elles
+en
+ene
+eneste
+enhver
+enn
+er
+et
+ett
+etter
+folk
+for
+fordi
+forsÃ»ke
+fra
+fÃ¥
+fÃ¸r
+fÃ»r
+fÃ»rst
+gjorde
+gjÃ»re
+god
+gÃ¥
+ha
+hadde
+han
+hans
+har
+hennar
+henne
+hennes
+her
+hjÃ¥
+ho
+hoe
+honom
+hoss
+hossen
+hun
+hva
+hvem
+hver
+hvilke
+hvilken
+hvis
+hvor
+hvordan
+hvorfor
+i
+ikke
+ikkje
+ingen
+ingi
+inkje
+inn
+innen
+inni
+ja
+jeg
+kan
+kom
+korleis
+korso
+kun
+kunne
+kva
+kvar
+kvarhelst
+kven
+kvi
+kvifor
+lage
+lang
+lik
+like
+makt
+man
+mange
+me
+med
+medan
+meg
+meget
+mellom
+men
+mens
+mer
+mest
+mi
+min
+mine
+mitt
+mot
+mye
+mykje
+mÃ¥
+mÃ¥te
+navn
+ned
+nei
+no
+noe
+noen
+noka
+noko
+nokon
+nokor
+nokre
+ny
+nÃ¥
+nÃ¥r
+og
+ogsÃ¥
+om
+opp
+oss
+over
+part
+punkt
+pÃ¥
+rett
+riktig
+samme
+sant
+seg
+selv
+si
+sia
+sidan
+siden
+sin
+sine
+sist
+sitt
+sjÃ¸l
+skal
+skulle
+slik
+slutt
+so
+som
+somme
+somt
+start
+stille
+sÃ¥
+sÃ¥nn
+tid
+til
+tilbake
+tilstand
+um
+under
+upp
+ut
+uten
+var
+vart
+varte
+ved
+verdi
+vere
+verte
+vi
+vil
+ville
+vite
+vore
+vors
+vort
+vÃ¥r
+vÃ¦re
+vÃ¦rt
+vÃ¶re
+vÃ¶rt
+Ã¥
\ No newline at end of file
diff --git a/static/stopwords/pl b/static/stopwords/pl
new file mode 100644
index 0000000..673f8a1
--- /dev/null
+++ b/static/stopwords/pl
@@ -0,0 +1,329 @@
+a
+aby
+ach
+acz
+aczkolwiek
+aj
+albo
+ale
+aleÅ¼
+ani
+aÅ¼
+bardziej
+bardzo
+bez
+bo
+bowiem
+by
+byli
+bym
+bynajmniej
+byÄ
+byÅ
+byÅa
+byÅo
+byÅy
+bÄdzie
+bÄdÄ
+cali
+caÅa
+caÅy
+chce
+choÄ
+ci
+ciebie
+ciÄ
+co
+cokolwiek
+coraz
+coÅ
+czasami
+czasem
+czemu
+czy
+czyli
+czÄsto
+daleko
+dla
+dlaczego
+dlatego
+do
+dobrze
+dokÄd
+doÅÄ
+dr
+duÅ¼o
+dwa
+dwaj
+dwie
+dwoje
+dzisiaj
+dziÅ
+gdy
+gdyby
+gdyÅ¼
+gdzie
+gdziekolwiek
+gdzieÅ
+go
+godz
+hab
+i
+ich
+ii
+iii
+ile
+im
+inna
+inne
+inny
+innych
+inÅ¼
+iv
+ix
+iÅ¼
+ja
+jak
+jakaÅ
+jakby
+jaki
+jakichÅ
+jakie
+jakiÅ
+jakiÅ¼
+jakkolwiek
+jako
+jakoÅ
+je
+jeden
+jedna
+jednak
+jednakÅ¼e
+jedno
+jednym
+jedynie
+jego
+jej
+jemu
+jest
+jestem
+jeszcze
+jeÅli
+jeÅ¼eli
+juÅ¼
+jÄ
+kaÅ¼dy
+kiedy
+kierunku
+kilka
+kilku
+kimÅ
+kto
+ktokolwiek
+ktoÅ
+ktÃ³ra
+ktÃ³re
+ktÃ³rego
+ktÃ³rej
+ktÃ³ry
+ktÃ³rych
+ktÃ³rym
+ktÃ³rzy
+ku
+lat
+lecz
+lub
+ma
+majÄ
+mam
+mamy
+maÅo
+mgr
+mi
+miaÅ
+mimo
+miÄdzy
+mnie
+mnÄ
+mogÄ
+moi
+moim
+moja
+moje
+moÅ¼e
+moÅ¼liwe
+moÅ¼na
+mu
+musi
+my
+mÃ³j
+na
+nad
+nam
+nami
+nas
+nasi
+nasz
+nasza
+nasze
+naszego
+naszych
+natomiast
+natychmiast
+nawet
+nic
+nich
+nie
+niech
+niego
+niej
+niemu
+nigdy
+nim
+nimi
+niÄ
+niÅ¼
+no
+nowe
+np
+nr
+o
+o.o.
+obok
+od
+ok
+okoÅo
+on
+ona
+one
+oni
+ono
+oraz
+oto
+owszem
+pan
+pana
+pani
+pl
+po
+pod
+podczas
+pomimo
+ponad
+poniewaÅ¼
+powinien
+powinna
+powinni
+powinno
+poza
+prawie
+prof
+przecieÅ¼
+przed
+przede
+przedtem
+przez
+przy
+raz
+razie
+roku
+rÃ³wnieÅ¼
+sam
+sama
+siÄ
+skÄd
+sobie
+sobÄ
+sposÃ³b
+swoje
+sÄ
+ta
+tak
+taka
+taki
+takich
+takie
+takÅ¼e
+tam
+te
+tego
+tej
+tel
+temu
+ten
+teraz
+teÅ¼
+to
+tobie
+tobÄ
+toteÅ¼
+totobÄ
+trzeba
+tu
+tutaj
+twoi
+twoim
+twoja
+twoje
+twym
+twÃ³j
+ty
+tych
+tylko
+tym
+tys
+tzw
+tÄ
+u
+ul
+vi
+vii
+viii
+vol
+w
+wam
+wami
+was
+wasi
+wasz
+wasza
+wasze
+we
+wedÅug
+wie
+wiele
+wielu
+wiÄc
+wiÄcej
+wszyscy
+wszystkich
+wszystkie
+wszystkim
+wszystko
+wtedy
+www
+wy
+wÅaÅnie
+wÅrÃ³d
+xi
+xii
+xiii
+xiv
+xv
+z
+za
+zapewne
+zawsze
+zaÅ
+ze
+zeznowu
+znowu
+znÃ³w
+zostaÅ
+zÅ
+Å¼aden
+Å¼adna
+Å¼adne
+Å¼adnych
+Å¼e
+Å¼eby
\ No newline at end of file
diff --git a/static/stopwords/pt b/static/stopwords/pt
new file mode 100644
index 0000000..53e4298
--- /dev/null
+++ b/static/stopwords/pt
@@ -0,0 +1,560 @@
+a
+acerca
+adeus
+agora
+ainda
+alem
+algmas
+algo
+algumas
+alguns
+ali
+alÃ©m
+ambas
+ambos
+ano
+anos
+antes
+ao
+aonde
+aos
+apenas
+apoio
+apontar
+apos
+apÃ³s
+aquela
+aquelas
+aquele
+aqueles
+aqui
+aquilo
+as
+assim
+atravÃ©s
+atrÃ¡s
+atÃ©
+aÃ­
+baixo
+bastante
+bem
+boa
+boas
+bom
+bons
+breve
+cada
+caminho
+catorze
+cedo
+cento
+certamente
+certeza
+cima
+cinco
+coisa
+com
+como
+comprido
+conhecido
+conselho
+contra
+contudo
+corrente
+cuja
+cujas
+cujo
+cujos
+custa
+cÃ¡
+da
+daquela
+daquelas
+daquele
+daqueles
+dar
+das
+de
+debaixo
+dela
+delas
+dele
+deles
+demais
+dentro
+depois
+desde
+desligado
+dessa
+dessas
+desse
+desses
+desta
+destas
+deste
+destes
+deve
+devem
+deverÃ¡
+dez
+dezanove
+dezasseis
+dezassete
+dezoito
+dia
+diante
+direita
+dispoe
+dispoem
+diversa
+diversas
+diversos
+diz
+dizem
+dizer
+do
+dois
+dos
+doze
+duas
+durante
+dÃ¡
+dÃ£o
+dÃºvida
+e
+ela
+elas
+ele
+eles
+em
+embora
+enquanto
+entao
+entre
+entÃ£o
+era
+eram
+essa
+essas
+esse
+esses
+esta
+estado
+estamos
+estar
+estarÃ¡
+estas
+estava
+estavam
+este
+esteja
+estejam
+estejamos
+estes
+esteve
+estive
+estivemos
+estiver
+estivera
+estiveram
+estiverem
+estivermos
+estivesse
+estivessem
+estiveste
+estivestes
+estivÃ©ramos
+estivÃ©ssemos
+estou
+estÃ¡
+estÃ¡s
+estÃ¡vamos
+estÃ£o
+eu
+exemplo
+falta
+farÃ¡
+favor
+faz
+fazeis
+fazem
+fazemos
+fazer
+fazes
+fazia
+faÃ§o
+fez
+fim
+final
+foi
+fomos
+for
+fora
+foram
+forem
+forma
+formos
+fosse
+fossem
+foste
+fostes
+fui
+fÃ´ramos
+fÃ´ssemos
+geral
+grande
+grandes
+grupo
+ha
+haja
+hajam
+hajamos
+havemos
+havia
+hei
+hoje
+hora
+horas
+houve
+houvemos
+houver
+houvera
+houveram
+houverei
+houverem
+houveremos
+houveria
+houveriam
+houvermos
+houverÃ¡
+houverÃ£o
+houverÃ­amos
+houvesse
+houvessem
+houvÃ©ramos
+houvÃ©ssemos
+hÃ¡
+hÃ£o
+iniciar
+inicio
+ir
+irÃ¡
+isso
+ista
+iste
+isto
+jÃ¡
+lado
+lhe
+lhes
+ligado
+local
+logo
+longe
+lugar
+lÃ¡
+maior
+maioria
+maiorias
+mais
+mal
+mas
+me
+mediante
+meio
+menor
+menos
+meses
+mesma
+mesmas
+mesmo
+mesmos
+meu
+meus
+mil
+minha
+minhas
+momento
+muito
+muitos
+mÃ¡ximo
+mÃªs
+na
+nada
+nao
+naquela
+naquelas
+naquele
+naqueles
+nas
+nem
+nenhuma
+nessa
+nessas
+nesse
+nesses
+nesta
+nestas
+neste
+nestes
+no
+noite
+nome
+nos
+nossa
+nossas
+nosso
+nossos
+nova
+novas
+nove
+novo
+novos
+num
+numa
+numas
+nunca
+nuns
+nÃ£o
+nÃ­vel
+nÃ³s
+nÃºmero
+o
+obra
+obrigada
+obrigado
+oitava
+oitavo
+oito
+onde
+ontem
+onze
+os
+ou
+outra
+outras
+outro
+outros
+para
+parece
+parte
+partir
+paucas
+pegar
+pela
+pelas
+pelo
+pelos
+perante
+perto
+pessoas
+pode
+podem
+poder
+poderÃ¡
+podia
+pois
+ponto
+pontos
+por
+porque
+porquÃª
+portanto
+posiÃ§Ã£o
+possivelmente
+posso
+possÃ­vel
+pouca
+pouco
+poucos
+povo
+primeira
+primeiras
+primeiro
+primeiros
+promeiro
+propios
+proprio
+prÃ³pria
+prÃ³prias
+prÃ³prio
+prÃ³prios
+prÃ³xima
+prÃ³ximas
+prÃ³ximo
+prÃ³ximos
+puderam
+pÃ´de
+pÃµe
+pÃµem
+quais
+qual
+qualquer
+quando
+quanto
+quarta
+quarto
+quatro
+que
+quem
+quer
+quereis
+querem
+queremas
+queres
+quero
+questÃ£o
+quieto
+quinta
+quinto
+quinze
+quÃ¡is
+quÃª
+relaÃ§Ã£o
+sabe
+sabem
+saber
+se
+segunda
+segundo
+sei
+seis
+seja
+sejam
+sejamos
+sem
+sempre
+sendo
+ser
+serei
+seremos
+seria
+seriam
+serÃ¡
+serÃ£o
+serÃ­amos
+sete
+seu
+seus
+sexta
+sexto
+sim
+sistema
+sob
+sobre
+sois
+somente
+somos
+sou
+sua
+suas
+sÃ£o
+sÃ©tima
+sÃ©timo
+sÃ³
+tal
+talvez
+tambem
+tambÃ©m
+tanta
+tantas
+tanto
+tarde
+te
+tem
+temos
+tempo
+tendes
+tenha
+tenham
+tenhamos
+tenho
+tens
+tentar
+tentaram
+tente
+tentei
+ter
+terceira
+terceiro
+terei
+teremos
+teria
+teriam
+terÃ¡
+terÃ£o
+terÃ­amos
+teu
+teus
+teve
+tinha
+tinham
+tipo
+tive
+tivemos
+tiver
+tivera
+tiveram
+tiverem
+tivermos
+tivesse
+tivessem
+tiveste
+tivestes
+tivÃ©ramos
+tivÃ©ssemos
+toda
+todas
+todo
+todos
+trabalhar
+trabalho
+treze
+trÃªs
+tu
+tua
+tuas
+tudo
+tÃ£o
+tÃ©m
+tÃªm
+tÃ­nhamos
+um
+uma
+umas
+uns
+usa
+usar
+vai
+vais
+valor
+veja
+vem
+vens
+ver
+verdade
+verdadeiro
+vez
+vezes
+viagem
+vindo
+vinte
+vocÃª
+vocÃªs
+vos
+vossa
+vossas
+vosso
+vossos
+vÃ¡rios
+vÃ£o
+vÃªm
+vÃ³s
+zero
+Ã 
+Ã s
+Ã¡rea
+Ã©
+Ã©ramos
+Ã©s
+Ãºltimo
\ No newline at end of file
diff --git a/static/stopwords/ro b/static/stopwords/ro
new file mode 100644
index 0000000..6b7dbfd
--- /dev/null
+++ b/static/stopwords/ro
@@ -0,0 +1,434 @@
+a
+abia
+acea
+aceasta
+aceastÄ
+aceea
+aceeasi
+acei
+aceia
+acel
+acela
+acelasi
+acele
+acelea
+acest
+acesta
+aceste
+acestea
+acestei
+acestia
+acestui
+aceÅti
+aceÅtia
+acolo
+acord
+acum
+adica
+ai
+aia
+aibÄ
+aici
+aiurea
+al
+ala
+alaturi
+ale
+alea
+alt
+alta
+altceva
+altcineva
+alte
+altfel
+alti
+altii
+altul
+am
+anume
+apoi
+ar
+are
+as
+asa
+asemenea
+asta
+astazi
+astea
+astfel
+astÄzi
+asupra
+atare
+atat
+atata
+atatea
+atatia
+ati
+atit
+atita
+atitea
+atitia
+atunci
+au
+avea
+avem
+aveÅ£i
+avut
+azi
+aÅ
+aÅadar
+aÅ£i
+b
+ba
+bine
+bucur
+bunÄ
+c
+ca
+cam
+cand
+capat
+care
+careia
+carora
+caruia
+cat
+catre
+caut
+ce
+cea
+ceea
+cei
+ceilalti
+cel
+cele
+celor
+ceva
+chiar
+ci
+cinci
+cind
+cine
+cineva
+cit
+cita
+cite
+citeva
+citi
+citiva
+conform
+contra
+cu
+cui
+cum
+cumva
+curÃ¢nd
+curÃ®nd
+cÃ¢nd
+cÃ¢t
+cÃ¢te
+cÃ¢tva
+cÃ¢Å£i
+cÃ®nd
+cÃ®t
+cÃ®te
+cÃ®tva
+cÃ®Å£i
+cÄ
+cÄci
+cÄrei
+cÄror
+cÄrui
+cÄtre
+d
+da
+daca
+dacÄ
+dar
+dat
+datoritÄ
+datÄ
+dau
+de
+deasupra
+deci
+decit
+degraba
+deja
+deoarece
+departe
+desi
+despre
+deÅi
+din
+dinaintea
+dintr
+dintr-
+dintre
+doar
+doi
+doilea
+douÄ
+drept
+dupa
+dupÄ
+dÄ
+e
+ea
+ei
+el
+ele
+era
+eram
+este
+eu
+exact
+eÅti
+f
+face
+fara
+fata
+fel
+fi
+fie
+fiecare
+fii
+fim
+fiu
+fiÅ£i
+foarte
+fost
+frumos
+fÄrÄ
+g
+geaba
+graÅ£ie
+h
+halbÄ
+i
+ia
+iar
+ieri
+ii
+il
+imi
+in
+inainte
+inapoi
+inca
+incit
+insa
+intr
+intre
+isi
+iti
+j
+k
+l
+la
+le
+li
+lor
+lui
+lÃ¢ngÄ
+lÃ®ngÄ
+m
+ma
+mai
+mare
+mea
+mei
+mele
+mereu
+meu
+mi
+mie
+mine
+mod
+mult
+multa
+multe
+multi
+multÄ
+mulÅ£i
+mulÅ£umesc
+mÃ¢ine
+mÃ®ine
+mÄ
+n
+ne
+nevoie
+ni
+nici
+niciodata
+nicÄieri
+nimeni
+nimeri
+nimic
+niste
+niÅte
+noastre
+noastrÄ
+noi
+noroc
+nostri
+nostru
+nou
+noua
+nouÄ
+noÅtri
+nu
+numai
+o
+opt
+or
+ori
+oricare
+orice
+oricine
+oricum
+oricÃ¢nd
+oricÃ¢t
+oricÃ®nd
+oricÃ®t
+oriunde
+p
+pai
+parca
+patra
+patru
+patrulea
+pe
+pentru
+peste
+pic
+pina
+plus
+poate
+pot
+prea
+prima
+primul
+prin
+printr-
+putini
+puÅ£in
+puÅ£ina
+puÅ£inÄ
+pÃ¢nÄ
+pÃ®nÄ
+r
+rog
+s
+sa
+sa-mi
+sa-ti
+sai
+sale
+sau
+se
+si
+sint
+sintem
+spate
+spre
+sub
+sunt
+suntem
+sunteÅ£i
+sus
+sutÄ
+sÃ®nt
+sÃ®ntem
+sÃ®nteÅ£i
+sÄ
+sÄi
+sÄu
+t
+ta
+tale
+te
+ti
+timp
+tine
+toata
+toate
+toatÄ
+tocmai
+tot
+toti
+totul
+totusi
+totuÅi
+toÅ£i
+trei
+treia
+treilea
+tu
+tuturor
+tÄi
+tÄu
+u
+ul
+ului
+un
+una
+unde
+undeva
+unei
+uneia
+unele
+uneori
+unii
+unor
+unora
+unu
+unui
+unuia
+unul
+v
+va
+vi
+voastre
+voastrÄ
+voi
+vom
+vor
+vostru
+vouÄ
+voÅtri
+vreme
+vreo
+vreun
+vÄ
+x
+z
+zece
+zero
+zi
+zice
+Ã®i
+Ã®l
+Ã®mi
+Ã®mpotriva
+Ã®n
+Ã®nainte
+Ã®naintea
+Ã®ncotro
+Ã®ncÃ¢t
+Ã®ncÃ®t
+Ã®ntre
+Ã®ntrucÃ¢t
+Ã®ntrucÃ®t
+Ã®Å£i
+Äla
+Älea
+Ästa
+Ästea
+ÄÅtia
+Åapte
+Åase
+Åi
+Åtiu
+Å£i
+Å£ie
\ No newline at end of file
diff --git a/static/stopwords/ru b/static/stopwords/ru
new file mode 100644
index 0000000..5db5ef1
--- /dev/null
+++ b/static/stopwords/ru
@@ -0,0 +1,559 @@
+c
+Ð°
+Ð°Ð»Ð»Ð¾
+Ð±ÐµÐ·
+Ð±ÐµÐ»ÑÐ¹
+Ð±Ð»Ð¸Ð·ÐºÐ¾
+Ð±Ð¾Ð»ÐµÐµ
+Ð±Ð¾Ð»ÑÑÐµ
+Ð±Ð¾Ð»ÑÑÐ¾Ð¹
+Ð±ÑÐ´ÐµÐ¼
+Ð±ÑÐ´ÐµÑ
+Ð±ÑÐ´ÐµÑÐµ
+Ð±ÑÐ´ÐµÑÑ
+Ð±ÑÐ´ÑÐ¾
+Ð±ÑÐ´Ñ
+Ð±ÑÐ´ÑÑ
+Ð±ÑÐ´Ñ
+Ð±Ñ
+Ð±ÑÐ²Ð°ÐµÑ
+Ð±ÑÐ²Ñ
+Ð±ÑÐ»
+Ð±ÑÐ»Ð°
+Ð±ÑÐ»Ð¸
+Ð±ÑÐ»Ð¾
+Ð±ÑÑÑ
+Ð²
+Ð²Ð°Ð¶Ð½Ð°Ñ
+Ð²Ð°Ð¶Ð½Ð¾Ðµ
+Ð²Ð°Ð¶Ð½ÑÐµ
+Ð²Ð°Ð¶Ð½ÑÐ¹
+Ð²Ð°Ð¼
+Ð²Ð°Ð¼Ð¸
+Ð²Ð°Ñ
+Ð²Ð°Ñ
+Ð²Ð°ÑÐ°
+Ð²Ð°ÑÐµ
+Ð²Ð°ÑÐ¸
+Ð²Ð²ÐµÑÑ
+Ð²Ð´Ð°Ð»Ð¸
+Ð²Ð´ÑÑÐ³
+Ð²ÐµÐ´Ñ
+Ð²ÐµÐ·Ð´Ðµ
+Ð²ÐµÑÐ½ÑÑÑÑÑ
+Ð²ÐµÑÑ
+Ð²ÐµÑÐµÑ
+Ð²Ð·Ð³Ð»ÑÐ´
+Ð²Ð·ÑÑÑ
+Ð²Ð¸Ð´
+Ð²Ð¸Ð´ÐµÐ»
+Ð²Ð¸Ð´ÐµÑÑ
+Ð²Ð¼ÐµÑÑÐµ
+Ð²Ð½Ðµ
+Ð²Ð½Ð¸Ð·
+Ð²Ð½Ð¸Ð·Ñ
+Ð²Ð¾
+Ð²Ð¾Ð´Ð°
+Ð²Ð¾Ð¹Ð½Ð°
+Ð²Ð¾ÐºÑÑÐ³
+Ð²Ð¾Ð½
+Ð²Ð¾Ð¾Ð±ÑÐµ
+Ð²Ð¾Ð¿ÑÐ¾Ñ
+Ð²Ð¾ÑÐµÐ¼Ð½Ð°Ð´ÑÐ°ÑÑÐ¹
+Ð²Ð¾ÑÐµÐ¼Ð½Ð°Ð´ÑÐ°ÑÑ
+Ð²Ð¾ÑÐµÐ¼Ñ
+Ð²Ð¾ÑÑÐ¼Ð¾Ð¹
+Ð²Ð¾Ñ
+Ð²Ð¿ÑÐ¾ÑÐµÐ¼
+Ð²ÑÐµÐ¼ÐµÐ½Ð¸
+Ð²ÑÐµÐ¼Ñ
+Ð²ÑÐµ
+Ð²ÑÐµ ÐµÑÐµ
+Ð²ÑÐµÐ³Ð´Ð°
+Ð²ÑÐµÐ³Ð¾
+Ð²ÑÐµÐ¼
+Ð²ÑÐµÐ¼Ð¸
+Ð²ÑÐµÐ¼Ñ
+Ð²ÑÐµÑ
+Ð²ÑÐµÑ
+Ð²ÑÑ
+Ð²ÑÑÐ´Ñ
+Ð²ÑÑ
+Ð²ÑÑ
+Ð²ÑÐ¾ÑÐ¾Ð¹
+Ð²Ñ
+Ð²ÑÐ¹ÑÐ¸
+Ð³
+Ð³Ð´Ðµ
+Ð³Ð»Ð°Ð²Ð½ÑÐ¹
+Ð³Ð»Ð°Ð·
+Ð³Ð¾Ð²Ð¾ÑÐ¸Ð»
+Ð³Ð¾Ð²Ð¾ÑÐ¸Ñ
+Ð³Ð¾Ð²Ð¾ÑÐ¸ÑÑ
+Ð³Ð¾Ð´
+Ð³Ð¾Ð´Ð°
+Ð³Ð¾Ð´Ñ
+Ð³Ð¾Ð»Ð¾Ð²Ð°
+Ð³Ð¾Ð»Ð¾Ñ
+Ð³Ð¾ÑÐ¾Ð´
+Ð´Ð°
+Ð´Ð°Ð²Ð°ÑÑ
+Ð´Ð°Ð²Ð½Ð¾
+Ð´Ð°Ð¶Ðµ
+Ð´Ð°Ð»ÐµÐºÐ¸Ð¹
+Ð´Ð°Ð»ÐµÐºÐ¾
+Ð´Ð°Ð»ÑÑÐµ
+Ð´Ð°ÑÐ¾Ð¼
+Ð´Ð°ÑÑ
+Ð´Ð²Ð°
+Ð´Ð²Ð°Ð´ÑÐ°ÑÑÐ¹
+Ð´Ð²Ð°Ð´ÑÐ°ÑÑ
+Ð´Ð²Ðµ
+Ð´Ð²ÐµÐ½Ð°Ð´ÑÐ°ÑÑÐ¹
+Ð´Ð²ÐµÐ½Ð°Ð´ÑÐ°ÑÑ
+Ð´Ð²ÐµÑÑ
+Ð´Ð²ÑÑ
+Ð´ÐµÐ²ÑÑÐ½Ð°Ð´ÑÐ°ÑÑÐ¹
+Ð´ÐµÐ²ÑÑÐ½Ð°Ð´ÑÐ°ÑÑ
+Ð´ÐµÐ²ÑÑÑÐ¹
+Ð´ÐµÐ²ÑÑÑ
+Ð´ÐµÐ¹ÑÑÐ²Ð¸ÑÐµÐ»ÑÐ½Ð¾
+Ð´ÐµÐ»
+Ð´ÐµÐ»Ð°Ð»
+Ð´ÐµÐ»Ð°ÑÑ
+Ð´ÐµÐ»Ð°Ñ
+Ð´ÐµÐ»Ð¾
+Ð´ÐµÐ½Ñ
+Ð´ÐµÐ½ÑÐ³Ð¸
+Ð´ÐµÑÑÑÑÐ¹
+Ð´ÐµÑÑÑÑ
+Ð´Ð»Ñ
+Ð´Ð¾
+Ð´Ð¾Ð²Ð¾Ð»ÑÐ½Ð¾
+Ð´Ð¾Ð»Ð³Ð¾
+Ð´Ð¾Ð»Ð¶ÐµÐ½
+Ð´Ð¾Ð»Ð¶Ð½Ð¾
+Ð´Ð¾Ð»Ð¶Ð½ÑÐ¹
+Ð´Ð¾Ð¼
+Ð´Ð¾ÑÐ¾Ð³Ð°
+Ð´ÑÑÐ³
+Ð´ÑÑÐ³Ð°Ñ
+Ð´ÑÑÐ³Ð¸Ðµ
+Ð´ÑÑÐ³Ð¸Ñ
+Ð´ÑÑÐ³Ð¾
+Ð´ÑÑÐ³Ð¾Ðµ
+Ð´ÑÑÐ³Ð¾Ð¹
+Ð´ÑÐ¼Ð°ÑÑ
+Ð´ÑÑÐ°
+Ðµ
+ÐµÐ³Ð¾
+ÐµÐµ
+ÐµÐ¹
+ÐµÐ¼Ñ
+ÐµÑÐ»Ð¸
+ÐµÑÑÑ
+ÐµÑÐµ
+ÐµÑÑ
+ÐµÑ
+ÐµÑ
+Ð¶
+Ð¶Ð´Ð°ÑÑ
+Ð¶Ðµ
+Ð¶ÐµÐ½Ð°
+Ð¶ÐµÐ½ÑÐ¸Ð½Ð°
+Ð¶Ð¸Ð·Ð½Ñ
+Ð¶Ð¸ÑÑ
+Ð·Ð°
+Ð·Ð°Ð½ÑÑ
+Ð·Ð°Ð½ÑÑÐ°
+Ð·Ð°Ð½ÑÑÐ¾
+Ð·Ð°Ð½ÑÑÑ
+Ð·Ð°ÑÐµÐ¼
+Ð·Ð°ÑÐ¾
+Ð·Ð°ÑÐµÐ¼
+Ð·Ð´ÐµÑÑ
+Ð·ÐµÐ¼Ð»Ñ
+Ð·Ð½Ð°ÑÑ
+Ð·Ð½Ð°ÑÐ¸Ñ
+Ð·Ð½Ð°ÑÐ¸ÑÑ
+Ð¸
+Ð¸Ð´Ð¸
+Ð¸Ð´ÑÐ¸
+Ð¸Ð·
+Ð¸Ð»Ð¸
+Ð¸Ð¼
+Ð¸Ð¼ÐµÐµÑ
+Ð¸Ð¼ÐµÐ»
+Ð¸Ð¼ÐµÐ½Ð½Ð¾
+Ð¸Ð¼ÐµÑÑ
+Ð¸Ð¼Ð¸
+Ð¸Ð¼Ñ
+Ð¸Ð½Ð¾Ð³Ð´Ð°
+Ð¸Ñ
+Ðº
+ÐºÐ°Ð¶Ð´Ð°Ñ
+ÐºÐ°Ð¶Ð´Ð¾Ðµ
+ÐºÐ°Ð¶Ð´ÑÐµ
+ÐºÐ°Ð¶Ð´ÑÐ¹
+ÐºÐ°Ð¶ÐµÑÑÑ
+ÐºÐ°Ð·Ð°ÑÑÑÑ
+ÐºÐ°Ðº
+ÐºÐ°ÐºÐ°Ñ
+ÐºÐ°ÐºÐ¾Ð¹
+ÐºÐµÐ¼
+ÐºÐ½Ð¸Ð³Ð°
+ÐºÐ¾Ð³Ð´Ð°
+ÐºÐ¾Ð³Ð¾
+ÐºÐ¾Ð¼
+ÐºÐ¾Ð¼Ð½Ð°ÑÐ°
+ÐºÐ¾Ð¼Ñ
+ÐºÐ¾Ð½ÐµÑ
+ÐºÐ¾Ð½ÐµÑÐ½Ð¾
+ÐºÐ¾ÑÐ¾ÑÐ°Ñ
+ÐºÐ¾ÑÐ¾ÑÐ¾Ð³Ð¾
+ÐºÐ¾ÑÐ¾ÑÐ¾Ð¹
+ÐºÐ¾ÑÐ¾ÑÑÐµ
+ÐºÐ¾ÑÐ¾ÑÑÐ¹
+ÐºÐ¾ÑÐ¾ÑÑÑ
+ÐºÑÐ¾Ð¼Ðµ
+ÐºÑÑÐ³Ð¾Ð¼
+ÐºÑÐ¾
+ÐºÑÐ´Ð°
+Ð»ÐµÐ¶Ð°ÑÑ
+Ð»ÐµÑ
+Ð»Ð¸
+Ð»Ð¸ÑÐ¾
+Ð»Ð¸ÑÑ
+Ð»ÑÑÑÐµ
+Ð»ÑÐ±Ð¸ÑÑ
+Ð»ÑÐ´Ð¸
+Ð¼
+Ð¼Ð°Ð»ÐµÐ½ÑÐºÐ¸Ð¹
+Ð¼Ð°Ð»Ð¾
+Ð¼Ð°ÑÑ
+Ð¼Ð°ÑÐ¸Ð½Ð°
+Ð¼ÐµÐ¶Ð´Ñ
+Ð¼ÐµÐ»Ñ
+Ð¼ÐµÐ½ÐµÐµ
+Ð¼ÐµÐ½ÑÑÐµ
+Ð¼ÐµÐ½Ñ
+Ð¼ÐµÑÑÐ¾
+Ð¼Ð¸Ð»Ð»Ð¸Ð¾Ð½Ð¾Ð²
+Ð¼Ð¸Ð¼Ð¾
+Ð¼Ð¸Ð½ÑÑÐ°
+Ð¼Ð¸Ñ
+Ð¼Ð¸ÑÐ°
+Ð¼Ð½Ðµ
+Ð¼Ð½Ð¾Ð³Ð¾
+Ð¼Ð½Ð¾Ð³Ð¾ÑÐ¸ÑÐ»ÐµÐ½Ð½Ð°Ñ
+Ð¼Ð½Ð¾Ð³Ð¾ÑÐ¸ÑÐ»ÐµÐ½Ð½Ð¾Ðµ
+Ð¼Ð½Ð¾Ð³Ð¾ÑÐ¸ÑÐ»ÐµÐ½Ð½ÑÐµ
+Ð¼Ð½Ð¾Ð³Ð¾ÑÐ¸ÑÐ»ÐµÐ½Ð½ÑÐ¹
+Ð¼Ð½Ð¾Ð¹
+Ð¼Ð½Ð¾Ñ
+Ð¼Ð¾Ð³
+Ð¼Ð¾Ð³Ñ
+Ð¼Ð¾Ð³ÑÑ
+Ð¼Ð¾Ð¶
+Ð¼Ð¾Ð¶ÐµÑ
+Ð¼Ð¾Ð¶ÐµÑ Ð±ÑÑÑ
+Ð¼Ð¾Ð¶Ð½Ð¾
+Ð¼Ð¾Ð¶ÑÐ¾
+Ð¼Ð¾Ð¸
+Ð¼Ð¾Ð¹
+Ð¼Ð¾Ñ
+Ð¼Ð¾ÑÐºÐ²Ð°
+Ð¼Ð¾ÑÑ
+Ð¼Ð¾Ñ
+Ð¼Ð¾Ñ
+Ð¼Ñ
+Ð½Ð°
+Ð½Ð°Ð²ÐµÑÑÑ
+Ð½Ð°Ð´
+Ð½Ð°Ð´Ð¾
+Ð½Ð°Ð·Ð°Ð´
+Ð½Ð°Ð¸Ð±Ð¾Ð»ÐµÐµ
+Ð½Ð°Ð¹ÑÐ¸
+Ð½Ð°ÐºÐ¾Ð½ÐµÑ
+Ð½Ð°Ð¼
+Ð½Ð°Ð¼Ð¸
+Ð½Ð°ÑÐ¾Ð´
+Ð½Ð°Ñ
+Ð½Ð°ÑÐ°Ð»Ð°
+Ð½Ð°ÑÐ°ÑÑ
+Ð½Ð°Ñ
+Ð½Ð°ÑÐ°
+Ð½Ð°ÑÐµ
+Ð½Ð°ÑÐ¸
+Ð½Ðµ
+Ð½ÐµÐ³Ð¾
+Ð½ÐµÐ´Ð°Ð²Ð½Ð¾
+Ð½ÐµÐ´Ð°Ð»ÐµÐºÐ¾
+Ð½ÐµÐµ
+Ð½ÐµÐ¹
+Ð½ÐµÐºÐ¾ÑÐ¾ÑÑÐ¹
+Ð½ÐµÐ»ÑÐ·Ñ
+Ð½ÐµÐ¼
+Ð½ÐµÐ¼Ð½Ð¾Ð³Ð¾
+Ð½ÐµÐ¼Ñ
+Ð½ÐµÐ¿ÑÐµÑÑÐ²Ð½Ð¾
+Ð½ÐµÑÐµÐ´ÐºÐ¾
+Ð½ÐµÑÐºÐ¾Ð»ÑÐºÐ¾
+Ð½ÐµÑ
+Ð½ÐµÑ
+Ð½ÐµÑ
+Ð½Ð¸
+Ð½Ð¸Ð±ÑÐ´Ñ
+Ð½Ð¸Ð¶Ðµ
+Ð½Ð¸Ð·ÐºÐ¾
+Ð½Ð¸ÐºÐ°ÐºÐ¾Ð¹
+Ð½Ð¸ÐºÐ¾Ð³Ð´Ð°
+Ð½Ð¸ÐºÑÐ¾
+Ð½Ð¸ÐºÑÐ´Ð°
+Ð½Ð¸Ð¼
+Ð½Ð¸Ð¼Ð¸
+Ð½Ð¸Ñ
+Ð½Ð¸ÑÐµÐ³Ð¾
+Ð½Ð¸ÑÑÐ¾
+Ð½Ð¾
+Ð½Ð¾Ð²ÑÐ¹
+Ð½Ð¾Ð³Ð°
+Ð½Ð¾ÑÑ
+Ð½Ñ
+Ð½ÑÐ¶Ð½Ð¾
+Ð½ÑÐ¶Ð½ÑÐ¹
+Ð½Ñ
+Ð¾
+Ð¾Ð±
+Ð¾Ð±Ð°
+Ð¾Ð±ÑÑÐ½Ð¾
+Ð¾Ð´Ð¸Ð½
+Ð¾Ð´Ð¸Ð½Ð½Ð°Ð´ÑÐ°ÑÑÐ¹
+Ð¾Ð´Ð¸Ð½Ð½Ð°Ð´ÑÐ°ÑÑ
+Ð¾Ð´Ð½Ð°Ð¶Ð´Ñ
+Ð¾Ð´Ð½Ð°ÐºÐ¾
+Ð¾Ð´Ð½Ð¾Ð³Ð¾
+Ð¾Ð´Ð½Ð¾Ð¹
+Ð¾ÐºÐ°Ð·Ð°ÑÑÑÑ
+Ð¾ÐºÐ½Ð¾
+Ð¾ÐºÐ¾Ð»Ð¾
+Ð¾Ð½
+Ð¾Ð½Ð°
+Ð¾Ð½Ð¸
+Ð¾Ð½Ð¾
+Ð¾Ð¿ÑÑÑ
+Ð¾ÑÐ¾Ð±ÐµÐ½Ð½Ð¾
+Ð¾ÑÑÐ°ÑÑÑÑ
+Ð¾Ñ
+Ð¾ÑÐ²ÐµÑÐ¸ÑÑ
+Ð¾ÑÐµÑ
+Ð¾ÑÐºÑÐ´Ð°
+Ð¾ÑÐ¾Ð²ÑÑÐ´Ñ
+Ð¾ÑÑÑÐ´Ð°
+Ð¾ÑÐµÐ½Ñ
+Ð¿ÐµÑÐ²ÑÐ¹
+Ð¿ÐµÑÐµÐ´
+Ð¿Ð¸ÑÐ°ÑÑ
+Ð¿Ð»ÐµÑÐ¾
+Ð¿Ð¾
+Ð¿Ð¾Ð´
+Ð¿Ð¾Ð´Ð¾Ð¹Ð´Ð¸
+Ð¿Ð¾Ð´ÑÐ¼Ð°ÑÑ
+Ð¿Ð¾Ð¶Ð°Ð»ÑÐ¹ÑÑÐ°
+Ð¿Ð¾Ð·Ð¶Ðµ
+Ð¿Ð¾Ð¹ÑÐ¸
+Ð¿Ð¾ÐºÐ°
+Ð¿Ð¾Ð»
+Ð¿Ð¾Ð»ÑÑÐ¸ÑÑ
+Ð¿Ð¾Ð¼Ð½Ð¸ÑÑ
+Ð¿Ð¾Ð½Ð¸Ð¼Ð°ÑÑ
+Ð¿Ð¾Ð½ÑÑÑ
+Ð¿Ð¾Ñ
+Ð¿Ð¾ÑÐ°
+Ð¿Ð¾ÑÐ»Ðµ
+Ð¿Ð¾ÑÐ»ÐµÐ´Ð½Ð¸Ð¹
+Ð¿Ð¾ÑÐ¼Ð¾ÑÑÐµÑÑ
+Ð¿Ð¾ÑÑÐµÐ´Ð¸
+Ð¿Ð¾ÑÐ¾Ð¼
+Ð¿Ð¾ÑÐ¾Ð¼Ñ
+Ð¿Ð¾ÑÐµÐ¼Ñ
+Ð¿Ð¾ÑÑÐ¸
+Ð¿ÑÐ°Ð²Ð´Ð°
+Ð¿ÑÐµÐºÑÐ°ÑÐ½Ð¾
+Ð¿ÑÐ¸
+Ð¿ÑÐ¾
+Ð¿ÑÐ¾ÑÑÐ¾
+Ð¿ÑÐ¾ÑÐ¸Ð²
+Ð¿ÑÐ¾ÑÐµÐ½ÑÐ¾Ð²
+Ð¿ÑÑÑ
+Ð¿ÑÑÐ½Ð°Ð´ÑÐ°ÑÑÐ¹
+Ð¿ÑÑÐ½Ð°Ð´ÑÐ°ÑÑ
+Ð¿ÑÑÑÐ¹
+Ð¿ÑÑÑ
+ÑÐ°Ð±Ð¾ÑÐ°
+ÑÐ°Ð±Ð¾ÑÐ°ÑÑ
+ÑÐ°Ð·
+ÑÐ°Ð·Ð²Ðµ
+ÑÐ°Ð½Ð¾
+ÑÐ°Ð½ÑÑÐµ
+ÑÐµÐ±ÐµÐ½Ð¾Ðº
+ÑÐµÑÐ¸ÑÑ
+ÑÐ¾ÑÑÐ¸Ñ
+ÑÑÐºÐ°
+ÑÑÑÑÐºÐ¸Ð¹
+ÑÑÐ´
+ÑÑÐ´Ð¾Ð¼
+Ñ
+Ñ ÐºÐµÐ¼
+ÑÐ°Ð¼
+ÑÐ°Ð¼Ð°
+ÑÐ°Ð¼Ð¸
+ÑÐ°Ð¼Ð¸Ð¼
+ÑÐ°Ð¼Ð¸Ð¼Ð¸
+ÑÐ°Ð¼Ð¸Ñ
+ÑÐ°Ð¼Ð¾
+ÑÐ°Ð¼Ð¾Ð³Ð¾
+ÑÐ°Ð¼Ð¾Ð¹
+ÑÐ°Ð¼Ð¾Ð¼
+ÑÐ°Ð¼Ð¾Ð¼Ñ
+ÑÐ°Ð¼Ñ
+ÑÐ°Ð¼ÑÐ¹
+ÑÐ²ÐµÑ
+ÑÐ²Ð¾Ðµ
+ÑÐ²Ð¾ÐµÐ³Ð¾
+ÑÐ²Ð¾ÐµÐ¹
+ÑÐ²Ð¾Ð¸
+ÑÐ²Ð¾Ð¸Ñ
+ÑÐ²Ð¾Ð¹
+ÑÐ²Ð¾Ñ
+ÑÐ´ÐµÐ»Ð°ÑÑ
+ÑÐµÐ°Ð¾Ð¹
+ÑÐµÐ±Ðµ
+ÑÐµÐ±Ñ
+ÑÐµÐ³Ð¾Ð´Ð½Ñ
+ÑÐµÐ´ÑÐ¼Ð¾Ð¹
+ÑÐµÐ¹ÑÐ°Ñ
+ÑÐµÐ¼Ð½Ð°Ð´ÑÐ°ÑÑÐ¹
+ÑÐµÐ¼Ð½Ð°Ð´ÑÐ°ÑÑ
+ÑÐµÐ¼Ñ
+ÑÐ¸Ð´ÐµÑÑ
+ÑÐ¸Ð»Ð°
+ÑÐ¸Ñ
+ÑÐºÐ°Ð·Ð°Ð»
+ÑÐºÐ°Ð·Ð°Ð»Ð°
+ÑÐºÐ°Ð·Ð°ÑÑ
+ÑÐºÐ¾Ð»ÑÐºÐ¾
+ÑÐ»Ð¸ÑÐºÐ¾Ð¼
+ÑÐ»Ð¾Ð²Ð¾
+ÑÐ»ÑÑÐ°Ð¹
+ÑÐ¼Ð¾ÑÑÐµÑÑ
+ÑÐ½Ð°ÑÐ°Ð»Ð°
+ÑÐ½Ð¾Ð²Ð°
+ÑÐ¾
+ÑÐ¾Ð±Ð¾Ð¹
+ÑÐ¾Ð±Ð¾Ñ
+ÑÐ¾Ð²ÐµÑÑÐºÐ¸Ð¹
+ÑÐ¾Ð²ÑÐµÐ¼
+ÑÐ¿Ð°ÑÐ¸Ð±Ð¾
+ÑÐ¿ÑÐ¾ÑÐ¸ÑÑ
+ÑÑÐ°Ð·Ñ
+ÑÑÐ°Ð»
+ÑÑÐ°ÑÑÐ¹
+ÑÑÐ°ÑÑ
+ÑÑÐ¾Ð»
+ÑÑÐ¾ÑÐ¾Ð½Ð°
+ÑÑÐ¾ÑÑÑ
+ÑÑÑÐ°Ð½Ð°
+ÑÑÑÑ
+ÑÑÐ¸ÑÐ°ÑÑ
+Ñ
+ÑÐ°
+ÑÐ°Ðº
+ÑÐ°ÐºÐ°Ñ
+ÑÐ°ÐºÐ¶Ðµ
+ÑÐ°ÐºÐ¸
+ÑÐ°ÐºÐ¸Ðµ
+ÑÐ°ÐºÐ¾Ðµ
+ÑÐ°ÐºÐ¾Ð¹
+ÑÐ°Ð¼
+ÑÐ²Ð¾Ð¸
+ÑÐ²Ð¾Ð¹
+ÑÐ²Ð¾Ñ
+ÑÐ²Ð¾Ñ
+ÑÐµ
+ÑÐµÐ±Ðµ
+ÑÐµÐ±Ñ
+ÑÐµÐ¼
+ÑÐµÐ¼Ð¸
+ÑÐµÐ¿ÐµÑÑ
+ÑÐµÑ
+ÑÐ¾
+ÑÐ¾Ð±Ð¾Ð¹
+ÑÐ¾Ð±Ð¾Ñ
+ÑÐ¾Ð²Ð°ÑÐ¸Ñ
+ÑÐ¾Ð³Ð´Ð°
+ÑÐ¾Ð³Ð¾
+ÑÐ¾Ð¶Ðµ
+ÑÐ¾Ð»ÑÐºÐ¾
+ÑÐ¾Ð¼
+ÑÐ¾Ð¼Ñ
+ÑÐ¾Ñ
+ÑÐ¾Ñ
+ÑÑÐµÑÐ¸Ð¹
+ÑÑÐ¸
+ÑÑÐ¸Ð½Ð°Ð´ÑÐ°ÑÑÐ¹
+ÑÑÐ¸Ð½Ð°Ð´ÑÐ°ÑÑ
+ÑÑ
+ÑÑÐ´Ð°
+ÑÑÑ
+ÑÑ
+ÑÑÑÑÑ
+Ñ
+ÑÐ²Ð¸Ð´ÐµÑÑ
+ÑÐ¶
+ÑÐ¶Ðµ
+ÑÐ»Ð¸ÑÐ°
+ÑÐ¼ÐµÑÑ
+ÑÑÑÐ¾
+ÑÐ¾ÑÐ¾ÑÐ¸Ð¹
+ÑÐ¾ÑÐ¾ÑÐ¾
+ÑÐ¾ÑÐµÐ» Ð±Ñ
+ÑÐ¾ÑÐµÑÑ
+ÑÐ¾ÑÑ
+ÑÐ¾ÑÑ
+ÑÐ¾ÑÐµÑÑ
+ÑÐ°Ñ
+ÑÐ°ÑÑÐ¾
+ÑÐ°ÑÑÑ
+ÑÐ°ÑÐµ
+ÑÐµÐ³Ð¾
+ÑÐµÐ»Ð¾Ð²ÐµÐº
+ÑÐµÐ¼
+ÑÐµÐ¼Ñ
+ÑÐµÑÐµÐ·
+ÑÐµÑÐ²ÐµÑÑÑÐ¹
+ÑÐµÑÑÑÐµ
+ÑÐµÑÑÑÐ½Ð°Ð´ÑÐ°ÑÑÐ¹
+ÑÐµÑÑÑÐ½Ð°Ð´ÑÐ°ÑÑ
+ÑÑÐ¾
+ÑÑÐ¾Ð±
+ÑÑÐ¾Ð±Ñ
+ÑÑÑÑ
+ÑÐµÑÑÐ½Ð°Ð´ÑÐ°ÑÑÐ¹
+ÑÐµÑÑÐ½Ð°Ð´ÑÐ°ÑÑ
+ÑÐµÑÑÐ¾Ð¹
+ÑÐµÑÑÑ
+ÑÑÐ°
+ÑÑÐ¸
+ÑÑÐ¸Ð¼
+ÑÑÐ¸Ð¼Ð¸
+ÑÑÐ¸Ñ
+ÑÑÐ¾
+ÑÑÐ¾Ð³Ð¾
+ÑÑÐ¾Ð¹
+ÑÑÐ¾Ð¼
+ÑÑÐ¾Ð¼Ñ
+ÑÑÐ¾Ñ
+ÑÑÑ
+Ñ
+ÑÐ²Ð»ÑÑÑÑ
\ No newline at end of file
diff --git a/static/stopwords/sk b/static/stopwords/sk
new file mode 100644
index 0000000..121ffb9
--- /dev/null
+++ b/static/stopwords/sk
@@ -0,0 +1,418 @@
+a
+aby
+aj
+ak
+akej
+akejÅ¾e
+ako
+akom
+akomÅ¾e
+akou
+akouÅ¾e
+akoÅ¾e
+akÃ¡
+akÃ¡Å¾e
+akÃ©
+akÃ©ho
+akÃ©hoÅ¾e
+akÃ©mu
+akÃ©muÅ¾e
+akÃ©Å¾e
+akÃº
+akÃºÅ¾e
+akÃ½
+akÃ½ch
+akÃ½chÅ¾e
+akÃ½m
+akÃ½mi
+akÃ½miÅ¾e
+akÃ½mÅ¾e
+akÃ½Å¾e
+ale
+alebo
+ani
+asi
+avÅ¡ak
+aÅ¾
+ba
+bez
+bezo
+bol
+bola
+boli
+bolo
+bude
+budem
+budeme
+budete
+budeÅ¡
+budÃº
+buÄ
+by
+byÅ¥
+cez
+cezo
+dnes
+do
+eÅ¡te
+ho
+hoci
+i
+iba
+ich
+im
+inej
+inom
+inÃ¡
+inÃ©
+inÃ©ho
+inÃ©mu
+inÃ­
+inÃº
+inÃ½
+inÃ½ch
+inÃ½m
+inÃ½mi
+ja
+je
+jeho
+jej
+jemu
+ju
+k
+kam
+kamÅ¾e
+kaÅ¾dou
+kaÅ¾dÃ¡
+kaÅ¾dÃ©
+kaÅ¾dÃ©ho
+kaÅ¾dÃ©mu
+kaÅ¾dÃ­
+kaÅ¾dÃº
+kaÅ¾dÃ½
+kaÅ¾dÃ½ch
+kaÅ¾dÃ½m
+kaÅ¾dÃ½mi
+kde
+kej
+kejÅ¾e
+keÄ
+keÄÅ¾e
+kie
+kieho
+kiehoÅ¾e
+kiemu
+kiemuÅ¾e
+kieÅ¾e
+koho
+kom
+komu
+kou
+kouÅ¾e
+kto
+ktorej
+ktorou
+ktorÃ¡
+ktorÃ©
+ktorÃ­
+ktorÃº
+ktorÃ½
+ktorÃ½ch
+ktorÃ½m
+ktorÃ½mi
+ku
+kÃ¡
+kÃ¡Å¾e
+kÃ©
+kÃ©Å¾e
+kÃº
+kÃºÅ¾e
+kÃ½
+kÃ½ho
+kÃ½hoÅ¾e
+kÃ½m
+kÃ½mu
+kÃ½muÅ¾e
+kÃ½Å¾e
+lebo
+leda
+ledaÅ¾e
+len
+ma
+majÃº
+mal
+mala
+mali
+maÅ¥
+medzi
+mi
+mne
+mnou
+moja
+moje
+mojej
+mojich
+mojim
+mojimi
+mojou
+moju
+moÅ¾no
+mu
+musia
+musieÅ¥
+musÃ­
+musÃ­m
+musÃ­me
+musÃ­te
+musÃ­Å¡
+my
+mÃ¡
+mÃ¡m
+mÃ¡me
+mÃ¡te
+mÃ¡Å¡
+mÃ´cÅ¥
+mÃ´j
+mÃ´jho
+mÃ´Å¾e
+mÃ´Å¾em
+mÃ´Å¾eme
+mÃ´Å¾ete
+mÃ´Å¾eÅ¡
+mÃ´Å¾u
+mÅa
+na
+nad
+nado
+najmÃ¤
+nami
+naÅ¡a
+naÅ¡e
+naÅ¡ej
+naÅ¡i
+naÅ¡ich
+naÅ¡im
+naÅ¡imi
+naÅ¡ou
+ne
+nech
+neho
+nej
+nejakej
+nejakom
+nejakou
+nejakÃ¡
+nejakÃ©
+nejakÃ©ho
+nejakÃ©mu
+nejakÃº
+nejakÃ½
+nejakÃ½ch
+nejakÃ½m
+nejakÃ½mi
+nemu
+neÅ¾
+nich
+nie
+niektorej
+niektorom
+niektorou
+niektorÃ¡
+niektorÃ©
+niektorÃ©ho
+niektorÃ©mu
+niektorÃº
+niektorÃ½
+niektorÃ½ch
+niektorÃ½m
+niektorÃ½mi
+nielen
+nieÄo
+nim
+nimi
+niÄ
+niÄoho
+niÄom
+niÄomu
+niÄÃ­m
+no
+nÃ¡m
+nÃ¡s
+nÃ¡Å¡
+nÃ¡Å¡ho
+nÃ­m
+o
+od
+odo
+on
+ona
+oni
+ono
+ony
+oÅ
+oÅho
+po
+pod
+podo
+podÄ¾a
+pokiaÄ¾
+popod
+popri
+potom
+poza
+pre
+pred
+predo
+preto
+pretoÅ¾e
+preÄo
+pri
+prÃ¡ve
+s
+sa
+seba
+sebe
+sebou
+sem
+si
+sme
+so
+som
+ste
+svoj
+svoja
+svoje
+svojho
+svojich
+svojim
+svojimi
+svojou
+svoju
+svojÃ­m
+sÃº
+ta
+tak
+takej
+takejto
+takÃ¡
+takÃ¡to
+takÃ©
+takÃ©ho
+takÃ©hoto
+takÃ©mu
+takÃ©muto
+takÃ©to
+takÃ­
+takÃº
+takÃºto
+takÃ½
+takÃ½to
+takÅ¾e
+tam
+teba
+tebe
+tebou
+teda
+tej
+tejto
+ten
+tento
+ti
+tie
+tieto
+tieÅ¾
+to
+toho
+tohoto
+tohto
+tom
+tomto
+tomu
+tomuto
+toto
+tou
+touto
+tu
+tvoj
+tvoja
+tvoje
+tvojej
+tvojho
+tvoji
+tvojich
+tvojim
+tvojimi
+tvojÃ­m
+ty
+tÃ¡
+tÃ¡to
+tÃ­
+tÃ­to
+tÃº
+tÃºto
+tÃ½ch
+tÃ½m
+tÃ½mi
+tÃ½mto
+u
+uÅ¾
+v
+vami
+vaÅ¡a
+vaÅ¡e
+vaÅ¡ej
+vaÅ¡i
+vaÅ¡ich
+vaÅ¡im
+vaÅ¡Ã­m
+veÄ
+viac
+vo
+vy
+vÃ¡m
+vÃ¡s
+vÃ¡Å¡
+vÃ¡Å¡ho
+vÅ¡ak
+vÅ¡etci
+vÅ¡etka
+vÅ¡etko
+vÅ¡etky
+vÅ¡etok
+z
+za
+zaÄo
+zaÄoÅ¾e
+zo
+Ã¡no
+Äej
+Äi
+Äia
+Äie
+Äieho
+Äiemu
+Äiu
+Äo
+Äoho
+Äom
+Äomu
+Äou
+ÄoÅ¾e
+ÄÃ­
+ÄÃ­m
+ÄÃ­mi
+ÄalÅ¡ia
+ÄalÅ¡ie
+ÄalÅ¡ieho
+ÄalÅ¡iemu
+ÄalÅ¡iu
+ÄalÅ¡om
+ÄalÅ¡ou
+ÄalÅ¡Ã­
+ÄalÅ¡Ã­ch
+ÄalÅ¡Ã­m
+ÄalÅ¡Ã­mi
+Åom
+Åou
+Åu
+Å¾e
\ No newline at end of file
diff --git a/static/stopwords/sl b/static/stopwords/sl
new file mode 100644
index 0000000..7135ed3
--- /dev/null
+++ b/static/stopwords/sl
@@ -0,0 +1,446 @@
+a
+ali
+april
+avgust
+b
+bi
+bil
+bila
+bile
+bili
+bilo
+biti
+blizu
+bo
+bodo
+bojo
+bolj
+bom
+bomo
+boste
+bova
+boÅ¡
+brez
+c
+cel
+cela
+celi
+celo
+d
+da
+daleÄ
+dan
+danes
+datum
+december
+deset
+deseta
+deseti
+deseto
+devet
+deveta
+deveti
+deveto
+do
+dober
+dobra
+dobri
+dobro
+dokler
+dol
+dolg
+dolga
+dolgi
+dovolj
+drug
+druga
+drugi
+drugo
+dva
+dve
+e
+eden
+en
+ena
+ene
+eni
+enkrat
+eno
+etc.
+f
+februar
+g
+g.
+ga
+ga.
+gor
+gospa
+gospod
+h
+halo
+i
+idr.
+ii
+iii
+in
+iv
+ix
+iz
+j
+januar
+jaz
+je
+ji
+jih
+jim
+jo
+julij
+junij
+jutri
+k
+kadarkoli
+kaj
+kajti
+kako
+kakor
+kamor
+kamorkoli
+kar
+karkoli
+katerikoli
+kdaj
+kdo
+kdorkoli
+ker
+ki
+kje
+kjer
+kjerkoli
+ko
+koder
+koderkoli
+koga
+komu
+kot
+kratek
+kratka
+kratke
+kratki
+l
+lahka
+lahke
+lahki
+lahko
+le
+lep
+lepa
+lepe
+lepi
+lepo
+leto
+m
+maj
+majhen
+majhna
+majhni
+malce
+malo
+manj
+marec
+me
+med
+medtem
+mene
+mesec
+mi
+midva
+midve
+mnogo
+moj
+moja
+moje
+mora
+morajo
+moram
+moramo
+morate
+moraÅ¡
+morem
+mu
+n
+na
+nad
+naj
+najina
+najino
+najmanj
+naju
+najveÄ
+nam
+narobe
+nas
+nato
+nazaj
+naÅ¡
+naÅ¡a
+naÅ¡e
+ne
+nedavno
+nedelja
+nek
+neka
+nekaj
+nekatere
+nekateri
+nekatero
+nekdo
+neke
+nekega
+neki
+nekje
+neko
+nekoga
+nekoÄ
+ni
+nikamor
+nikdar
+nikjer
+nikoli
+niÄ
+nje
+njega
+njegov
+njegova
+njegovo
+njej
+njemu
+njen
+njena
+njeno
+nji
+njih
+njihov
+njihova
+njihovo
+njiju
+njim
+njo
+njun
+njuna
+njuno
+no
+nocoj
+november
+npr.
+o
+ob
+oba
+obe
+oboje
+od
+odprt
+odprta
+odprti
+okoli
+oktober
+on
+onadva
+one
+oni
+onidve
+osem
+osma
+osmi
+osmo
+oz.
+p
+pa
+pet
+peta
+petek
+peti
+peto
+po
+pod
+pogosto
+poleg
+poln
+polna
+polni
+polno
+ponavadi
+ponedeljek
+ponovno
+potem
+povsod
+pozdravljen
+pozdravljeni
+prav
+prava
+prave
+pravi
+pravo
+prazen
+prazna
+prazno
+prbl.
+precej
+pred
+prej
+preko
+pri
+pribl.
+pribliÅ¾no
+primer
+pripravljen
+pripravljena
+pripravljeni
+proti
+prva
+prvi
+prvo
+r
+ravno
+redko
+res
+reÄ
+s
+saj
+sam
+sama
+same
+sami
+samo
+se
+sebe
+sebi
+sedaj
+sedem
+sedma
+sedmi
+sedmo
+sem
+september
+seveda
+si
+sicer
+skoraj
+skozi
+slab
+smo
+so
+sobota
+spet
+sreda
+srednja
+srednji
+sta
+ste
+stran
+stvar
+sva
+t
+ta
+tak
+taka
+take
+taki
+tako
+takoj
+tam
+te
+tebe
+tebi
+tega
+teÅ¾ak
+teÅ¾ka
+teÅ¾ki
+teÅ¾ko
+ti
+tista
+tiste
+tisti
+tisto
+tj.
+tja
+to
+toda
+torek
+tretja
+tretje
+tretji
+tri
+tu
+tudi
+tukaj
+tvoj
+tvoja
+tvoje
+u
+v
+vaju
+vam
+vas
+vaÅ¡
+vaÅ¡a
+vaÅ¡e
+ve
+vedno
+velik
+velika
+veliki
+veliko
+vendar
+ves
+veÄ
+vi
+vidva
+vii
+viii
+visok
+visoka
+visoke
+visoki
+vsa
+vsaj
+vsak
+vsaka
+vsakdo
+vsake
+vsaki
+vsakomur
+vse
+vsega
+vsi
+vso
+vÄasih
+vÄeraj
+x
+z
+za
+zadaj
+zadnji
+zakaj
+zaprta
+zaprti
+zaprto
+zdaj
+zelo
+zunaj
+Ä
+Äe
+Äesto
+Äetrta
+Äetrtek
+Äetrti
+Äetrto
+Äez
+Äigav
+Å¡
+Å¡est
+Å¡esta
+Å¡esti
+Å¡esto
+Å¡tiri
+Å¾
+Å¾e
\ No newline at end of file
diff --git a/static/stopwords/so b/static/stopwords/so
new file mode 100644
index 0000000..4153023
--- /dev/null
+++ b/static/stopwords/so
@@ -0,0 +1,30 @@
+aad
+albaabkii
+atabo
+ay
+ayaa
+ayee
+ayuu
+dhan
+hadana
+in
+inuu
+isku
+jiray
+jirtay
+ka
+kale
+kasoo
+ku
+kuu
+lakin
+markii
+oo
+si
+soo
+uga
+ugu
+uu
+waa
+waxa
+waxuu
\ No newline at end of file
diff --git a/static/stopwords/st b/static/stopwords/st
new file mode 100644
index 0000000..92bd21d
--- /dev/null
+++ b/static/stopwords/st
@@ -0,0 +1,31 @@
+a
+ba
+bane
+bona
+e
+ea
+eaba
+empa
+ena
+ha
+hae
+hape
+ho
+hore
+ka
+ke
+la
+le
+li
+me
+mo
+moo
+ne
+o
+oa
+re
+sa
+se
+tloha
+tsa
+tse
\ No newline at end of file
diff --git a/static/stopwords/sv b/static/stopwords/sv
new file mode 100644
index 0000000..e768342
--- /dev/null
+++ b/static/stopwords/sv
@@ -0,0 +1,418 @@
+aderton
+adertonde
+adjÃ¶
+aldrig
+alla
+allas
+allt
+alltid
+alltsÃ¥
+andra
+andras
+annan
+annat
+artonde
+artonn
+att
+av
+bakom
+bara
+behÃ¶va
+behÃ¶vas
+behÃ¶vde
+behÃ¶vt
+beslut
+beslutat
+beslutit
+bland
+blev
+bli
+blir
+blivit
+bort
+borta
+bra
+bÃ¤st
+bÃ¤ttre
+bÃ¥da
+bÃ¥das
+dag
+dagar
+dagarna
+dagen
+de
+del
+delen
+dem
+den
+denna
+deras
+dess
+dessa
+det
+detta
+dig
+din
+dina
+dit
+ditt
+dock
+dom
+du
+dÃ¤r
+dÃ¤rfÃ¶r
+dÃ¥
+e
+efter
+eftersom
+ej
+elfte
+eller
+elva
+emot
+en
+enkel
+enkelt
+enkla
+enligt
+ens
+er
+era
+ers
+ert
+ett
+ettusen
+fanns
+fem
+femte
+femtio
+femtionde
+femton
+femtonde
+fick
+fin
+finnas
+finns
+fjorton
+fjortonde
+fjÃ¤rde
+fler
+flera
+flesta
+fram
+framfÃ¶r
+frÃ¥n
+fyra
+fyrtio
+fyrtionde
+fÃ¥
+fÃ¥r
+fÃ¥tt
+fÃ¶ljande
+fÃ¶r
+fÃ¶re
+fÃ¶rlÃ¥t
+fÃ¶rra
+fÃ¶rsta
+genast
+genom
+gick
+gjorde
+gjort
+god
+goda
+godare
+godast
+gott
+gÃ¤lla
+gÃ¤ller
+gÃ¤llt
+gÃ¤rna
+gÃ¥
+gÃ¥r
+gÃ¥tt
+gÃ¶r
+gÃ¶ra
+ha
+hade
+haft
+han
+hans
+har
+heller
+hellre
+helst
+helt
+henne
+hennes
+hit
+hon
+honom
+hundra
+hundraen
+hundraett
+hur
+hÃ¤r
+hÃ¶g
+hÃ¶ger
+hÃ¶gre
+hÃ¶gst
+i
+ibland
+icke
+idag
+igen
+igÃ¥r
+imorgon
+in
+infÃ¶r
+inga
+ingen
+ingenting
+inget
+innan
+inne
+inom
+inte
+inuti
+ja
+jag
+jo
+ju
+just
+jÃ¤mfÃ¶rt
+kan
+kanske
+knappast
+kom
+komma
+kommer
+kommit
+kr
+kunde
+kunna
+kunnat
+kvar
+legat
+ligga
+ligger
+lika
+likstÃ¤lld
+likstÃ¤llda
+lilla
+lite
+liten
+litet
+lÃ¤nge
+lÃ¤ngre
+lÃ¤ngst
+lÃ¤tt
+lÃ¤ttare
+lÃ¤ttast
+lÃ¥ngsam
+lÃ¥ngsammare
+lÃ¥ngsammast
+lÃ¥ngsamt
+lÃ¥ngt
+lÃ¥t
+man
+med
+mej
+mellan
+men
+mer
+mera
+mest
+mig
+min
+mina
+mindre
+minst
+mitt
+mittemot
+mot
+mycket
+mÃ¥nga
+mÃ¥ste
+mÃ¶jlig
+mÃ¶jligen
+mÃ¶jligt
+mÃ¶jligtvis
+ned
+nederst
+nedersta
+nedre
+nej
+ner
+ni
+nio
+nionde
+nittio
+nittionde
+nitton
+nittonde
+nog
+noll
+nr
+nu
+nummer
+nÃ¤r
+nÃ¤sta
+nÃ¥gon
+nÃ¥gonting
+nÃ¥got
+nÃ¥gra
+nÃ¥n
+nÃ¥nting
+nÃ¥t
+nÃ¶dvÃ¤ndig
+nÃ¶dvÃ¤ndiga
+nÃ¶dvÃ¤ndigt
+nÃ¶dvÃ¤ndigtvis
+och
+ocksÃ¥
+ofta
+oftast
+olika
+olikt
+om
+oss
+pÃ¥
+rakt
+redan
+rÃ¤tt
+sa
+sade
+sagt
+samma
+sedan
+senare
+senast
+sent
+sex
+sextio
+sextionde
+sexton
+sextonde
+sig
+sin
+sina
+sist
+sista
+siste
+sitt
+sitta
+sju
+sjunde
+sjuttio
+sjuttionde
+sjutton
+sjuttonde
+sjÃ¤lv
+sjÃ¤tte
+ska
+skall
+skulle
+slutligen
+smÃ¥
+smÃ¥tt
+snart
+som
+stor
+stora
+stort
+stÃ¶rre
+stÃ¶rst
+sÃ¤ga
+sÃ¤ger
+sÃ¤mre
+sÃ¤mst
+sÃ¥
+sÃ¥dan
+sÃ¥dana
+sÃ¥dant
+ta
+tack
+tar
+tidig
+tidigare
+tidigast
+tidigt
+till
+tills
+tillsammans
+tio
+tionde
+tjugo
+tjugoen
+tjugoett
+tjugonde
+tjugotre
+tjugotvÃ¥
+tjungo
+tolfte
+tolv
+tre
+tredje
+trettio
+trettionde
+tretton
+trettonde
+tvÃ¥
+tvÃ¥hundra
+under
+upp
+ur
+ursÃ¤kt
+ut
+utan
+utanfÃ¶r
+ute
+va
+vad
+var
+vara
+varfÃ¶r
+varifrÃ¥n
+varit
+varje
+varken
+vars
+varsÃ¥god
+vart
+vem
+vems
+verkligen
+vi
+vid
+vidare
+viktig
+viktigare
+viktigast
+viktigt
+vilka
+vilkas
+vilken
+vilket
+vill
+vÃ¤l
+vÃ¤nster
+vÃ¤nstra
+vÃ¤rre
+vÃ¥r
+vÃ¥ra
+vÃ¥rt
+Ã¤n
+Ã¤nnu
+Ã¤r
+Ã¤ven
+Ã¥t
+Ã¥tminstone
+Ã¥tta
+Ã¥ttio
+Ã¥ttionde
+Ã¥ttonde
+Ã¶ver
+Ã¶vermorgon
+Ã¶verst
+Ã¶vre
\ No newline at end of file
diff --git a/static/stopwords/sw b/static/stopwords/sw
new file mode 100644
index 0000000..9d54c01
--- /dev/null
+++ b/static/stopwords/sw
@@ -0,0 +1,74 @@
+akasema
+alikuwa
+alisema
+baada
+basi
+bila
+cha
+chini
+hadi
+hapo
+hata
+hivyo
+hiyo
+huku
+huo
+ili
+ilikuwa
+juu
+kama
+karibu
+katika
+kila
+kima
+kisha
+kubwa
+kutoka
+kuwa
+kwa
+kwamba
+kwenda
+kwenye
+la
+lakini
+mara
+mdogo
+mimi
+mkubwa
+mmoja
+moja
+muda
+mwenye
+na
+naye
+ndani
+ng
+ni
+nini
+nonkungu
+pamoja
+pia
+sana
+sasa
+sauti
+tafadhali
+tena
+tu
+vile
+wa
+wakati
+wake
+walikuwa
+wao
+watu
+wengine
+wote
+ya
+yake
+yangu
+yao
+yeye
+yule
+za
+zaidi
+zake
\ No newline at end of file
diff --git a/static/stopwords/th b/static/stopwords/th
new file mode 100644
index 0000000..ed52946
--- /dev/null
+++ b/static/stopwords/th
@@ -0,0 +1,115 @@
+à¸à¸¥à¹à¸²à¸§
+à¸à¸§à¹à¸²
+à¸à¸±à¸
+à¸à¸±à¸
+à¸à¸²à¸£
+à¸à¹
+à¸à¹à¸­à¸
+à¸à¸à¸°
+à¸à¸­
+à¸à¸­à¸
+à¸à¸¶à¹à¸
+à¸à¸
+à¸à¸£à¸±à¹à¸
+à¸à¸§à¸²à¸¡
+à¸à¸·à¸­
+à¸à¸°
+à¸à¸±à¸
+à¸à¸²à¸
+à¸à¸¶à¸
+à¸à¹à¸§à¸
+à¸à¸¶à¹à¸
+à¸à¸±à¸
+à¸à¹à¸§à¸¢
+à¸à¹à¸²à¸
+à¸à¸±à¹à¸
+à¸à¸±à¹à¸à¹à¸à¹
+à¸à¸²à¸¡
+à¸à¹à¸­
+à¸à¹à¸²à¸
+à¸à¹à¸²à¸à¹
+à¸à¹à¸­à¸
+à¸à¸¶à¸
+à¸à¸¹à¸
+à¸à¹à¸²
+à¸à¸±à¹à¸
+à¸à¸±à¹à¸à¸à¸µà¹
+à¸à¸²à¸
+à¸à¸³
+à¸à¸³à¹à¸«à¹
+à¸à¸µà¹
+à¸à¸µà¹à¸ªà¸¸à¸
+à¸à¸¸à¸
+à¸à¸­à¸à¸à¸²à¸
+à¸à¸±à¸
+à¸à¸±à¹à¸
+à¸à¸³
+à¸à¸µà¹
+à¸à¹à¸²
+à¸à¸²à¸
+à¸à¸¥
+à¸à¹à¸²à¸
+à¸à¸
+à¸à¸£à¹à¸­à¸¡
+à¸¡à¸²
+à¸¡à¸²à¸
+à¸¡à¸µ
+à¸¢à¸±à¸
+à¸£à¸§à¸¡
+à¸£à¸°à¸«à¸§à¹à¸²à¸
+à¸£à¸±à¸
+à¸£à¸²à¸¢
+à¸£à¹à¸§à¸¡
+à¸¥à¸
+à¸§à¸±à¸
+à¸§à¹à¸²
+à¸ªà¸³à¸«à¸£à¸±à¸
+à¸ªà¸¸à¸
+à¸ªà¹à¸
+à¸ªà¹à¸§à¸
+à¸«à¸à¸¶à¹à¸
+à¸«à¸£à¸·à¸­
+à¸«à¸¥à¸±à¸
+à¸«à¸¥à¸±à¸à¸à¸²à¸
+à¸«à¸¥à¸²à¸¢
+à¸«à¸²à¸
+à¸­à¸¢à¸²à¸
+à¸­à¸¢à¸¹à¹
+à¸­à¸¢à¹à¸²à¸
+à¸­à¸­à¸
+à¸­à¸°à¹à¸£
+à¸­à¸²à¸
+à¸­à¸µà¸
+à¹à¸à¸²
+à¹à¸à¹à¸²
+à¹à¸à¸¢
+à¹à¸à¸à¸²à¸°
+à¹à¸à¹à¸
+à¹à¸à¸µà¸¢à¸§
+à¹à¸à¸µà¸¢à¸§à¸à¸±à¸
+à¹à¸à¸·à¹à¸­à¸à¸à¸²à¸
+à¹à¸à¸´à¸
+à¹à¸à¸´à¸à¹à¸à¸¢
+à¹à¸à¹à¸
+à¹à¸à¹à¸à¸à¸²à¸£
+à¹à¸à¸£à¸²à¸°
+à¹à¸à¸·à¹à¸­
+à¹à¸¡à¸·à¹à¸­
+à¹à¸£à¸²
+à¹à¸£à¸´à¹à¸¡
+à¹à¸¥à¸¢
+à¹à¸«à¹à¸
+à¹à¸­à¸
+à¹à¸à¹
+à¹à¸à¸
+à¹à¸£à¸
+à¹à¸¥à¸°
+à¹à¸¥à¹à¸§
+à¹à¸«à¹à¸
+à¹à¸à¸¢
+à¹à¸
+à¹à¸«à¹
+à¹à¸à¹
+à¹à¸
+à¹à¸¡à¹
+à¹à¸§à¹
\ No newline at end of file
diff --git a/static/stopwords/tl b/static/stopwords/tl
new file mode 100644
index 0000000..1fb0a91
--- /dev/null
+++ b/static/stopwords/tl
@@ -0,0 +1,147 @@
+akin
+aking
+ako
+alin
+am
+amin
+aming
+ang
+ano
+anumang
+apat
+at
+atin
+ating
+ay
+bababa
+bago
+bakit
+bawat
+bilang
+dahil
+dalawa
+dapat
+din
+dito
+doon
+gagawin
+gayunman
+ginagawa
+ginawa
+ginawang
+gumawa
+gusto
+habang
+hanggang
+hindi
+huwag
+iba
+ibaba
+ibabaw
+ibig
+ikaw
+ilagay
+ilalim
+ilan
+inyong
+isa
+isang
+itaas
+ito
+iyo
+iyon
+iyong
+ka
+kahit
+kailangan
+kailanman
+kami
+kanila
+kanilang
+kanino
+kanya
+kanyang
+kapag
+kapwa
+karamihan
+katiyakan
+katulad
+kaya
+kaysa
+ko
+kong
+kulang
+kumuha
+kung
+laban
+lahat
+lamang
+likod
+lima
+maaari
+maaaring
+maging
+mahusay
+makita
+marami
+marapat
+masyado
+may
+mayroon
+mga
+minsan
+mismo
+mula
+muli
+na
+nabanggit
+naging
+nagkaroon
+nais
+nakita
+namin
+napaka
+narito
+nasaan
+ng
+ngayon
+ni
+nila
+nilang
+nito
+niya
+niyang
+noon
+o
+pa
+paano
+pababa
+paggawa
+pagitan
+pagkakaroon
+pagkatapos
+palabas
+pamamagitan
+panahon
+pangalawa
+para
+paraan
+pareho
+pataas
+pero
+pumunta
+pumupunta
+sa
+saan
+sabi
+sabihin
+sarili
+sila
+sino
+siya
+tatlo
+tayo
+tulad
+tungkol
+una
+walang
\ No newline at end of file
diff --git a/static/stopwords/tr b/static/stopwords/tr
new file mode 100644
index 0000000..9fb17f2
--- /dev/null
+++ b/static/stopwords/tr
@@ -0,0 +1,504 @@
+acaba
+acep
+adamakÄ±llÄ±
+adeta
+ait
+altmÃ½Ã¾
+altmÄ±Å
+altÃ½
+altÄ±
+ama
+amma
+anca
+ancak
+arada
+artÃ½k
+aslÄ±nda
+aynen
+ayrÄ±ca
+az
+aÃ§Ä±kÃ§a
+aÃ§Ä±kÃ§asÄ±
+bana
+bari
+bazen
+bazÃ½
+bazÄ±
+baÅkasÄ±
+baÅ£ka
+belki
+ben
+benden
+beni
+benim
+beri
+beriki
+beÃ¾
+beÅ
+beÅ£
+bilcÃ¼mle
+bile
+bin
+binaen
+binaenaleyh
+bir
+biraz
+birazdan
+birbiri
+birden
+birdenbire
+biri
+birice
+birileri
+birisi
+birkaÃ§
+birkaÃ§Ä±
+birkez
+birlikte
+birÃ§ok
+birÃ§oÄu
+birÃ¾ey
+birÃ¾eyi
+birÅey
+birÅeyi
+birÅ£ey
+bitevi
+biteviye
+bittabi
+biz
+bizatihi
+bizce
+bizcileyin
+bizden
+bize
+bizi
+bizim
+bizimki
+bizzat
+boÅuna
+bu
+buna
+bunda
+bundan
+bunlar
+bunlarÄ±
+bunlarÄ±n
+bunu
+bunun
+buracÄ±kta
+burada
+buradan
+burasÄ±
+bÃ¶yle
+bÃ¶ylece
+bÃ¶ylecene
+bÃ¶ylelikle
+bÃ¶ylemesine
+bÃ¶ylesine
+bÃ¼sbÃ¼tÃ¼n
+bÃ¼tÃ¼n
+cuk
+cÃ¼mlesi
+da
+daha
+dahi
+dahil
+dahilen
+daima
+dair
+dayanarak
+de
+defa
+dek
+demin
+demincek
+deminden
+denli
+derakap
+derhal
+derken
+deÄil
+deÄil
+deÄin
+diye
+diÄer
+diÄer
+diÄeri
+doksan
+dokuz
+dolayÄ±
+dolayÄ±sÄ±yla
+doÄru
+dÃ¶rt
+edecek
+eden
+ederek
+edilecek
+ediliyor
+edilmesi
+ediyor
+elbet
+elbette
+elli
+emme
+en
+enikonu
+epey
+epeyce
+epeyi
+esasen
+esnasÄ±nda
+etmesi
+etraflÄ±
+etraflÄ±ca
+etti
+ettiÄi
+ettiÄini
+evleviyetle
+evvel
+evvela
+evvelce
+evvelden
+evvelemirde
+evveli
+eÄer
+eÄer
+fakat
+filanca
+gah
+gayet
+gayetle
+gayri
+gayrÄ±
+gelgelelim
+gene
+gerek
+gerÃ§i
+geÃ§ende
+geÃ§enlerde
+gibi
+gibilerden
+gibisinden
+gine
+gÃ¶re
+gÄ±rla
+hakeza
+halbuki
+halen
+halihazÄ±rda
+haliyle
+handiyse
+hangi
+hangisi
+hani
+hariÃ§
+hasebiyle
+hasÄ±lÄ±
+hatta
+hele
+hem
+henÃ¼z
+hep
+hepsi
+her
+herhangi
+herkes
+herkesin
+hiÃ§
+hiÃ§bir
+hiÃ§biri
+hoÅ
+hulasaten
+iken
+iki
+ila
+ile
+ilen
+ilgili
+ilk
+illa
+illaki
+imdi
+indinde
+inen
+insermi
+ise
+ister
+itibaren
+itibariyle
+itibarÄ±yla
+iyi
+iyice
+iyicene
+iÃ§in
+iÅ
+iÅte
+iÅ£te
+kadar
+kaffesi
+kah
+kala
+kanÃ½mca
+karÅÄ±n
+katrilyon
+kaynak
+kaÃ§Ä±
+kelli
+kendi
+kendilerine
+kendini
+kendisi
+kendisine
+kendisini
+kere
+kez
+keza
+kezalik
+keÅke
+keÅ£ke
+ki
+kim
+kimden
+kime
+kimi
+kimisi
+kimse
+kimsecik
+kimsecikler
+kÃ¼lliyen
+kÃ½rk
+kÃ½saca
+kÄ±rk
+kÄ±saca
+lakin
+leh
+lÃ¼tfen
+maada
+madem
+mademki
+mamafih
+mebni
+meÄer
+meÄer
+meÄerki
+meÄerse
+milyar
+milyon
+mu
+mÃ¼
+mÃ½
+mÄ±
+nasÃ½l
+nasÄ±l
+nasÄ±lsa
+nazaran
+naÅi
+ne
+neden
+nedeniyle
+nedenle
+nedense
+nerde
+nerden
+nerdeyse
+nere
+nerede
+nereden
+neredeyse
+neresi
+nereye
+netekim
+neye
+neyi
+neyse
+nice
+nihayet
+nihayetinde
+nitekim
+niye
+niÃ§in
+o
+olan
+olarak
+oldu
+olduklarÄ±nÄ±
+oldukÃ§a
+olduÄu
+olduÄunu
+olmadÄ±
+olmadÄ±ÄÄ±
+olmak
+olmasÄ±
+olmayan
+olmaz
+olsa
+olsun
+olup
+olur
+olursa
+oluyor
+on
+ona
+onca
+onculayÄ±n
+onda
+ondan
+onlar
+onlardan
+onlari
+onlarÃ½n
+onlarÄ±
+onlarÄ±n
+onu
+onun
+oracÄ±k
+oracÄ±kta
+orada
+oradan
+oranca
+oranla
+oraya
+otuz
+oysa
+oysaki
+pek
+pekala
+peki
+pekÃ§e
+peyderpey
+raÄmen
+sadece
+sahi
+sahiden
+sana
+sanki
+sekiz
+seksen
+sen
+senden
+seni
+senin
+siz
+sizden
+sizi
+sizin
+sonra
+sonradan
+sonralarÄ±
+sonunda
+tabii
+tam
+tamam
+tamamen
+tamamÄ±yla
+tarafÄ±ndan
+tek
+trilyon
+tÃ¼m
+var
+vardÄ±
+vasÄ±tasÄ±yla
+ve
+velev
+velhasÄ±l
+velhasÄ±lÄ±kelam
+veya
+veyahut
+ya
+yahut
+yakinen
+yakÄ±nda
+yakÄ±ndan
+yakÄ±nlarda
+yalnÄ±z
+yalnÄ±zca
+yani
+yapacak
+yapmak
+yaptÄ±
+yaptÄ±klarÄ±
+yaptÄ±ÄÄ±
+yaptÄ±ÄÄ±nÄ±
+yapÄ±lan
+yapÄ±lmasÄ±
+yapÄ±yor
+yedi
+yeniden
+yenilerde
+yerine
+yetmiÃ¾
+yetmiÅ
+yetmiÅ£
+yine
+yirmi
+yok
+yoksa
+yoluyla
+yÃ¼z
+yÃ¼zÃ¼nden
+zarfÄ±nda
+zaten
+zati
+zira
+Ã§abuk
+Ã§abukÃ§a
+Ã§eÅitli
+Ã§ok
+Ã§oklarÄ±
+Ã§oklarÄ±nca
+Ã§okluk
+Ã§oklukla
+Ã§okÃ§a
+Ã§oÄu
+Ã§oÄun
+Ã§oÄunca
+Ã§oÄunlukla
+Ã§Ã¼nkÃ¼
+Ã¶bÃ¼r
+Ã¶bÃ¼rkÃ¼
+Ã¶bÃ¼rÃ¼
+Ã¶nce
+Ã¶nceden
+Ã¶nceleri
+Ã¶ncelikle
+Ã¶teki
+Ã¶tekisi
+Ã¶yle
+Ã¶ylece
+Ã¶ylelikle
+Ã¶ylemesine
+Ã¶z
+Ã¼zere
+Ã¼Ã§
+Ã¾ey
+Ã¾eyden
+Ã¾eyi
+Ã¾eyler
+Ã¾u
+Ã¾una
+Ã¾unda
+Ã¾undan
+Ã¾unu
+Åayet
+Åey
+Åeyden
+Åeyi
+Åeyler
+Åu
+Åuna
+ÅuncacÄ±k
+Åunda
+Åundan
+Åunlar
+ÅunlarÄ±
+Åunu
+Åunun
+Åura
+ÅuracÄ±k
+ÅuracÄ±kta
+ÅurasÄ±
+ÅÃ¶yle
+Å£ayet
+Å£imdi
+Å£u
+Å£Ã¶yle
\ No newline at end of file
diff --git a/static/stopwords/uk b/static/stopwords/uk
new file mode 100644
index 0000000..8fb7ef6
--- /dev/null
+++ b/static/stopwords/uk
@@ -0,0 +1,73 @@
+Ð°Ð²Ð¶ÐµÐ¶
+Ð°Ð´Ð¶Ðµ
+Ð°Ð»Ðµ
+Ð±
+Ð±ÐµÐ·
+Ð±ÑÐ²
+Ð±ÑÐ»Ð°
+Ð±ÑÐ»Ð¸
+Ð±ÑÐ»Ð¾
+Ð±ÑÑÐ¸
+Ð±ÑÐ»ÑÑ
+Ð²Ð°Ð¼
+Ð²Ð°Ñ
+Ð²ÐµÑÑ
+Ð²Ð·Ð´Ð¾Ð²Ð¶
+Ð²Ð¸
+Ð²Ð½Ð¸Ð·
+Ð²Ð½Ð¸Ð·Ñ
+Ð²Ð¾Ð½Ð°
+Ð²Ð¾Ð½Ð¸
+Ð²Ð¾Ð½Ð¾
+Ð²ÑÐµ
+Ð²ÑÐµÑÐµÐ´Ð¸Ð½Ñ
+Ð²ÑÑÑ
+Ð²ÑÐ´
+Ð²ÑÐ½
+Ð´Ð°
+Ð´Ð°Ð²Ð°Ð¹
+Ð´Ð°Ð²Ð°ÑÐ¸
+Ð´Ðµ
+Ð´ÐµÑÐ¾
+Ð´Ð»Ñ
+Ð´Ð¾
+Ð·
+Ð·Ð°Ð²Ð¶Ð´Ð¸
+Ð·Ð°Ð¼ÑÑÑÑ
+Ð¹
+ÐºÐ¾Ð»Ð¸
+Ð»ÐµÐ´Ð²Ðµ
+Ð¼Ð°Ð¹Ð¶Ðµ
+Ð¼Ð¸
+Ð½Ð°Ð²ÐºÐ¾Ð»Ð¾
+Ð½Ð°Ð²ÑÑÑ
+Ð½Ð°Ð¼
+Ð¾Ñ
+Ð¾ÑÐ¶Ðµ
+Ð¾ÑÐ¾Ð¶
+Ð¿Ð¾Ð·Ð°
+Ð¿ÑÐ¾
+Ð¿ÑÐ´
+ÑÐ°
+ÑÐ°Ðº
+ÑÐ°ÐºÐ¸Ð¹
+ÑÐ°ÐºÐ¾Ð¶
+ÑÐµ
+ÑÐ¸
+ÑÐ¾Ð±ÑÐ¾
+ÑÐ¾Ð¶
+ÑÐ¾ÑÐ¾
+ÑÐ¾ÑÐ°
+ÑÐµ
+ÑÐµÐ¹
+ÑÐ¸
+ÑÐ¾Ð³Ð¾
+ÑÐ¾
+ÑÐº
+ÑÐºÐ¸Ð¹
+ÑÐºÐ¾Ñ
+Ñ
+ÑÐ·
+ÑÐ½ÑÐ¸Ñ
+ÑÑ
+ÑÑ
\ No newline at end of file
diff --git a/static/stopwords/ur b/static/stopwords/ur
new file mode 100644
index 0000000..3b9eef5
--- /dev/null
+++ b/static/stopwords/ur
@@ -0,0 +1,517 @@
+Ø¢Ø¦Û
+Ø¢Ø¦Û
+Ø¢Ø¬
+Ø¢Ø®Ø±
+Ø¢Ø®Ø±Ú©Ø¨Ø±
+Ø¢Ø¯ÙÛ
+Ø¢ÙØ¨
+Ø¢Ù¹Ú¾
+Ø¢ÛØ¨
+Ø§Ø©
+Ø§Ø®Ø¨Ø²Øª
+Ø§Ø®ØªØªØ¨Ù
+Ø§Ø¯Ú¾Ø±
+Ø§Ø±Ø¯
+Ø§Ø±Ø¯Ú¯Ø±Ø¯
+Ø§Ø±Ú©Ø¨Ù
+Ø§Ø´
+Ø§Ø¶ØªØ¹ÙØ¨Ù
+Ø§Ø¶ØªØ¹ÙØ¨ÙØ§Øª
+Ø§Ø¶Ø·Ø±Ø°
+Ø§Ø¶Ú©Ø¨
+Ø§Ø¶Ú©Û
+Ø§Ø¶Ú©Û
+Ø§Ø·Ø±Ø§Ù
+Ø§ØºÛØ¨
+Ø§ÙØ±Ø§Ø¯
+Ø§ÙÚ¯
+Ø§ÙØ±
+Ø§ÙÙÚØ¨
+Ø§ÙÙÚØ¨Ø¦Û
+Ø§ÙÙÚÛ
+Ø§ÙÙÚÛ
+Ø§Ù
+Ø§Ù
+Ø§ÙØ°Ø±
+Ø§ÙÛÛÚº
+Ø§Ù¹Ú¾Ø¨ÙØ¨
+Ø§Ù¾ÙØ¨
+Ø§Ù¾ÙÛ
+Ø§ÚÚ¾Ø¨
+Ø§ÚÚ¾Û
+Ø§ÚÚ¾Û
+Ø§Ú©Ø«Ø±
+Ø§Ú©Ù¹Ú¾Ø¨
+Ø§Ú©Ù¹Ú¾Û
+Ø§Ú©Ù¹Ú¾Û
+Ø§Ú©ÛÙØ§
+Ø§Ú©ÛÙÛ
+Ø§Ú©ÛÙÛ
+Ø§Ú¯Ø±ÚÛ
+Ø§ÛÙ
+Ø§ÛØ·Û
+Ø§ÛÚ©
+Ø¨
+Øª
+ØªØ¨Ø²Ù
+ØªØª
+ØªØ±
+ØªØ±ØªÛØª
+ØªØ±ÛÙ
+ØªØ¹Ø°Ø§Ø¯
+ØªÙ
+ØªÙ
+ØªÙØ¨Ù
+ØªÙÛÛ
+ØªÙÛÛÚº
+ØªÙÛØ¨
+ØªÚ©
+ØªÚ¾Ø¨
+ØªÚ¾ÙÚØ§
+ØªÚ¾ÙÚÛ
+ØªÚ¾ÙÚÛ
+ØªÚ¾Û
+ØªÚ¾Û
+ØªÛÙ
+Ø«Ø¨
+Ø«Ø¨Ø¦ÛÚº
+Ø«Ø¨ØªØ±ØªÛØª
+Ø«Ø¨Ø±Û
+Ø«Ø¨Ø±Û
+Ø«Ø¨Ø¹Ø«
+Ø«Ø¨ÙØ§
+Ø«Ø¨ÙØªØ±ØªÛØª
+Ø«Ø¨ÛØ±
+Ø«Ø¯Ø¨Ø¦Û
+Ø«Ø±Ø¢Úº
+Ø«Ø±Ø§Úº
+Ø«Ø±Ø´
+Ø«Ø¹Ø°
+Ø«ØºÛØ±
+Ø«ÙÙØ°
+Ø«ÙÙØ°ÙØ«Ø¨ÙØ§
+Ø«ÙÚ©Û
+Ø«Ù
+Ø«ÙØ¨
+Ø«ÙØ¨Ø±ÛØ¨
+Ø«ÙØ¨Ø±ÛÛ
+Ø«ÙØ¨Ø±ÛÛ
+Ø«ÙØ¨ÙØ¨
+Ø«ÙØ°
+Ø«ÙØ°Ú©Ø±Ù
+Ø«ÙØ°Ú©Ø±ÙØ¨
+Ø«ÙØ°Û
+Ø«ÚØ§
+Ø«ÚÙÚº
+Ø«ÚÛ
+Ø«ÚÛ
+Ø«Ú¾Ø±
+Ø«Ú¾Ø±Ø§
+Ø«Ú¾Ø±Ø§ÛÙØ§
+Ø«Ú¾Ø±Ù¾ÙØ±
+Ø«Ú¾Û
+Ø«ÛØª
+Ø«ÛØªØ±
+Ø«ÛØªØ±Û
+Ø«ÛØªØ±ÛÙ
+Ø«ÛÚ
+Ø¬
+Ø®Ø¨
+Ø®Ø¨Ø±ÛØ¨
+Ø®Ø¨Ø±ÛÛ
+Ø®Ø¨Ø±ÛÛ
+Ø®Ø¨ÙÙØ¸
+Ø®Ø¨ÙØ¨
+Ø®Ø¨ÙØªØ¨
+Ø®Ø¨ÙØªÛ
+Ø®Ø¨ÙØªÛ
+Ø®Ø¨ÙÙØ¨
+Ø®Øª
+Ø®ØªÙ
+Ø®Ø¬Ú©Û
+Ø®Øµ
+Ø®Ø·Ø·Ø±Ø°
+Ø®ÙØ°Û
+Ø®Ù
+Ø®ÙØ§Ù
+Ø®ÙÙÛÛ
+Ø®ÙÚ©Û
+Ø®ÙØ¨Ø©
+Ø®Ú¯Û
+Ø®Ú¯ÛÙÚº
+Ø®Ú¯ÛÛÚº
+Ø®ÛØ·Ø¨
+Ø®ÛØ·Ø¨Ú©Û
+Ø¯Ø±
+Ø¯Ø±Ø®Ø¨Øª
+Ø¯Ø±Ø®Û
+Ø¯Ø±Ø®Û
+Ø¯Ø±Ø²ÙÛÙØª
+Ø¯Ø±Ø¶Øª
+Ø¯Ø´
+Ø¯ÙØ¹Û
+Ø¯ÙÚØ·Ù¾
+Ø¯ÙÚØ·Ù¾Û
+Ø¯ÙÚØ·Ù¾ÛØ¨Úº
+Ø¯Ù
+Ø¯ÙØ±
+Ø¯ÙØ±Ø§Ù
+Ø¯ÙØ¶Ø±Ø§
+Ø¯ÙØ¶Ø±ÙÚº
+Ø¯ÙØ¶Ø±Û
+Ø¯ÙØ¶Ø±Û
+Ø¯ÙÙÙÚº
+Ø¯Ú©Ú¾Ø¨Ø¦ÛÚº
+Ø¯Ú©Ú¾Ø¨ØªØ¨
+Ø¯Ú©Ú¾Ø¨ØªÛ
+Ø¯Ú©Ú¾Ø¨ØªÛ
+Ø¯Ú©Ú¾Ø¨Ù
+Ø¯Ú©Ú¾Ø¨ÙØ¨
+Ø¯Ú©Ú¾Ø¨ÛØ¨
+Ø¯Û
+Ø¯ÛØ¨
+Ø¯ÛØªØ¨
+Ø¯ÛØªÛ
+Ø¯ÛØªÛ
+Ø¯ÛØ±
+Ø¯ÛÙØ¨
+Ø¯ÛÚ©Ú¾Ù
+Ø¯ÛÚ©Ú¾ÙØ¨
+Ø¯ÛÚ©Ú¾Û
+Ø¯ÛÚ©Ú¾ÛÚº
+Ø¯Û
+Ø±
+Ø±Ø§Ø¶ØªÙÚº
+Ø±Ø§Ø¶ØªÛ
+Ø±Ø§Ø¶ØªÛ
+Ø±Ø±ÛØ¹Û
+Ø±Ø±ÛØ¹Û
+Ø±Ú©Ù
+Ø±Ú©Ú¾
+Ø±Ú©Ú¾Ø¨
+Ø±Ú©Ú¾ØªØ¨
+Ø±Ú©Ú¾ØªØ¨ÛÙÚº
+Ø±Ú©Ú¾ØªÛ
+Ø±Ú©Ú¾ØªÛ
+Ø±Ú©Ú¾Û
+Ø±Ú©Ú¾Û
+Ø±ÛØ¨
+Ø±ÛÛ
+Ø±ÛÛ
+Ø²
+Ø²Ø¨ØµÙ
+Ø²Ø¨Ø¶Ø±
+Ø²Ø¨Ù
+Ø²Ø¨ÙØ§Øª
+Ø²Ø¨ÙÛÛ
+Ø²ØµÙÚº
+Ø²ØµÛ
+Ø²ØµÛ
+Ø²ÙØ¨Ø¦Ù
+Ø²ÙÛØªÛÚº
+Ø²ÙÛÙØª
+Ø²Ú©Ù
+Ø²Ú©ÙÛÛ
+Ø²ÛØ¨Ø¯Ù
+ØµØ¨Ù
+ØµØ³ÛØ±
+ØµÙØ±
+ØµÙØ±Øª
+ØµÙØ±ØªØ³Ø¨Ù
+ØµÙØ±ØªÙÚº
+ØµÙØ±ØªÛÚº
+Ø¶
+Ø¶Ø¨Øª
+Ø¶Ø¨ØªÚ¾
+Ø¶Ø¨Ø¯Ù
+Ø¶Ø¨Ø±Ø§
+Ø¶Ø¨Ø±Û
+Ø¶Ø¨Ù
+Ø¶Ø¨ÙÙÚº
+Ø¶Øª
+Ø¶Ø±ÙØ±
+Ø¶Ø±ÙØ±Øª
+Ø¶Ø±ÙØ±Û
+Ø¶ÙØ·ÙÛ
+Ø¶ÙÚ
+Ø¶ÙÚØ¨
+Ø¶ÙÚØªØ¨
+Ø¶ÙÚØªÛ
+Ø¶ÙÚØªÛ
+Ø¶ÙÚÙ
+Ø¶ÙÚÙØ¨
+Ø¶ÙÚÛ
+Ø¶ÙÚÛÚº
+Ø¶Ú©Ø¨
+Ø¶Ú©ØªØ¨
+Ø¶Ú©ØªÛ
+Ø¶Ú©ØªÛ
+Ø¶Ú©ÙØ¨
+Ø¶Ú©Û
+Ø¶Ú©Û
+Ø¶ÛØ°Ú¾Ø¨
+Ø¶ÛØ°Ú¾Û
+Ø¶ÛØ°Ú¾Û
+Ø¶ÛÚ©ÙÚ
+Ø¶Û
+Ø·Ø±Ù
+Ø·Ø±ÛÙ
+Ø·Ø±ÛÙÙÚº
+Ø·Ø±ÛÙÛ
+Ø·Ø±ÛÙÛ
+Ø·ÙØ±
+Ø·ÙØ±Ù¾Ø±
+Ø¸Ø¨ÛØ±
+Ø¹
+Ø¹Ø°Ø¯
+Ø¹Ø¸ÛÙ
+Ø¹ÙØ§ÙÙÚº
+Ø¹ÙØ§ÙÛ
+Ø¹ÙØ§ÙÛ
+Ø¹ÙØ§ÙÙ
+Ø¹ÙÙÙÛ
+ØºØ¨ÛØ°
+ØºØ®Øµ
+ØºØ°
+ØºØ±ÙØ¹
+ØºØ±ÙØ¹Ø¨Øª
+ØºÛ
+ÙØ±Ø¯
+ÙÛ
+Ù
+ÙØ¬Ù
+ÙØ¬ÛÙÛ
+ÙØ·Ù
+ÙØ¦Û
+ÙØ§
+ÙØ§Ø²ÙÛ
+ÙÙ
+ÙÙØ¬Ø¨
+ÙÙØ¬Û
+ÙÙØ¬Û
+ÙÙØ³Ø¨Øª
+ÙÙØ³Û
+ÙÙÚ¯
+ÙÙÚ¯ÙÚº
+ÙÚÚ©Ù¾Ù
+ÙÚ¯ØªØ¨
+ÙÚ¯ØªÛ
+ÙÚ¯ØªÛ
+ÙÚ¯ÙØ¨
+ÙÚ¯Û
+ÙÚ¯ÛÚº
+ÙÚ¯Û
+ÙÛ
+ÙÛØ¨
+ÙÛÙØ¨
+ÙÛÚº
+ÙÛ
+Ù
+ÙØªØ¹ÙÙ
+ÙØ®ØªÙÙ
+ÙØ³ØªØ±Ù
+ÙØ³ØªØ±ÙÛ
+ÙØ³Ø·ÙØ´
+ÙØ³ÛØ°
+ÙØ·Ø¦ÙÛ
+ÙØ·Ø¦ÙÛ
+ÙØ·Ø¨Ø¦Ù
+ÙØ·ØªØ¹ÙÙ
+ÙØ·ÙÙ
+ÙØ¹ÙÙÙ
+ÙØ»ØªÙÙ
+ÙÙØ§
+ÙÙÚ©Ù
+ÙÙÚ©ÙØ¨Øª
+ÙÙÚ©ÙÛ
+ÙÙØ¨Ø¶Øª
+ÙÚØ§
+ÙÚÙØ¨
+ÙÚÛ
+ÙÚ©ÙÙ
+ÙÚ¯Ø±
+ÙÛØ±Ø«Ø¨Ù
+ÙÛØ±Ø§
+ÙÛØ±Û
+ÙÛØ±Û
+ÙÛÚº
+Ù
+ÙØ§Ø±
+ÙØ§ÙÛ
+ÙÙ
+ÙØ¦Û
+ÙØ¦Û
+ÙØ¨
+ÙØ¨Ù¾Ø·ÙØ°
+ÙØ¨Ú¯Ø³ÛØ±
+ÙØ·Ø¬Øª
+ÙÙØ·Û
+ÙÙ
+ÙÙØ®ÙØ§Ù
+ÙÚ©Ø¨ÙÙØ¨
+ÙÚ©ØªÛ
+ÙÛ
+ÙÛÛÚº
+ÙÛØ¨
+ÙÛ
+Ù Ø¢Ø´
+Ù¹Ú¾ÛÚ©
+Ù¾Ø¨Ø¦Û
+Ù¾Ø¨Ø´
+Ù¾Ø¨ÙØ¨
+Ù¾Ø¨ÙÚ
+Ù¾Ø±
+Ù¾Ø±Ø§ÙØ¨
+Ù¾Ø·ÙØ°
+Ù¾Ù
+Ù¾ÙØ±Ø§
+Ù¾ÙÚÚ¾Ø¨
+Ù¾ÙÚÚ¾ØªØ¨
+Ù¾ÙÚÚ¾ØªÛ
+Ù¾ÙÚÚ¾ØªÛ
+Ù¾ÙÚÚ¾Ù
+Ù¾ÙÚÚ¾ÙÚº
+Ù¾ÙÚÚ¾ÙØ¨
+Ù¾ÙÚÚ¾ÛÚº
+Ù¾ÚÚ¾ÙØ§
+Ù¾Ú¾Ø±
+Ù¾ÛÙØ§
+Ù¾ÛÙÛ
+Ù¾ÛÙÛØ¶Û
+Ù¾ÛÙÛØ¶Û
+Ù¾ÛÙÛØ¶ÛÛÛ
+Ù¾ÛØ¹
+ÚØ¨Ø±
+ÚØ¨ÛØ¨
+ÚØ¨ÛÙØ¨
+ÚØ¨ÛÛ
+ÚÙØ§
+ÚÙÙ
+ÚÙÛÚº
+ÚÙÛ
+ÚÚ©Ø¨
+ÚÚ©Û
+ÚÚ©ÛÚº
+ÚÚ©Û
+ÚÚ¾ÙÙ¹Ø¨
+ÚÚ¾ÙÙ¹ÙÚº
+ÚÚ¾ÙÙ¹Û
+ÚÚ¾ÙÙ¹Û
+ÚÚ¾Û
+ÚÛØ³ÛÚº
+ÚÚ¾ÙÙÚØ§
+ÚÚ¾ÙÙÚÙÛØ¨
+ÚÚ¾ÙÙÚÙ
+ÚÚ¾ÙÙÚÙØ¨
+ÚÚ¾ÙÙÚÛ
+ÚÚ¾ÙÙÚÛÚº
+Ú©
+Ú©Ø¦Û
+Ú©Ø¦Û
+Ú©Ø¨
+Ú©Ø¨ÙÛ
+Ú©Ø¨Ù
+Ú©Øª
+Ú©Ø¬Ú¾Û
+Ú©Ø±Ø§
+Ú©Ø±ØªØ¨
+Ú©Ø±ØªØ¨ÛÙÚº
+Ú©Ø±ØªÛ
+Ú©Ø±ØªÛ
+Ú©Ø±ØªÛÛÙ
+Ú©Ø±Ø±ÛØ¨
+Ú©Ø±Ø±ÛÛ
+Ú©Ø±Ø±ÛÛ
+Ú©Ø±Ù
+Ú©Ø±ÙØ¨
+Ú©Ø±ÛÚº
+Ú©Ø±Û
+Ú©Ø·Û
+Ú©Ù
+Ú©Ù
+Ú©ÙØ¦Û
+Ú©ÙØªØ±
+Ú©ÙØ±Ø§
+Ú©ÙØ±ÙÚº
+Ú©ÙØ±Ù
+Ú©ÙØ±Û
+Ú©ÙØ·Ù
+Ú©ÙÙ
+Ú©ÙÙØ·Ø¨
+Ú©ÙÙØ·Û
+Ú©ÙÙØ·Û
+Ú©Ú¾ÙÙØ§
+Ú©Ú¾ÙÙÙ
+Ú©Ú¾ÙÙÙØ¨
+Ú©Ú¾ÙÙÛ
+Ú©Ú¾ÙÙÛÚº
+Ú©Ú¾ÙÙÛ
+Ú©Û
+Ú©ÛØ¨
+Ú©ÛØªØ¨
+Ú©ÛØªÛ
+Ú©ÛØªÛ
+Ú©ÛÙ
+Ú©ÛÙÚº
+Ú©ÛÙØ¨
+Ú©ÛÛ
+Ú©ÛÛÚº
+Ú©ÛÛ
+Ú©Û
+Ú©ÛØ¨
+Ú©ÛØ·Ø¨
+Ú©ÛØ·Ø±Ù
+Ú©ÛØ·Û
+Ú©ÛÙØ¦Û
+Ú©ÛÙÙÚ©Û
+Ú©ÛÙÚº
+Ú©ÛÛ
+Ú©Û
+Ú©ÛØ«Ø¹Ø°
+Ú©ÛØ±Ø±ÛØ¹Û
+Ú¯Ø¦Û
+Ú¯Ø¦Û
+Ú¯Ø¨
+Ú¯Ø±Ø¯
+Ú¯Ø±ÙÙ
+Ú¯Ø±ÙÙ¾
+Ú¯Ø±ÙÛÙÚº
+Ú¯ÙØªÛ
+Ú¯Û
+Ú¯ÛØ¨
+Ú¯Û
+ÛØ±
+ÛÙ
+ÛÙ
+ÛÙØ¦Û
+ÛÙØ¦Û
+ÛÙØ§
+ÛÙØ¨Ø±Ø§
+ÛÙØ¨Ø±Û
+ÛÙØ¨Ø±Û
+ÛÙØªØ¨
+ÛÙØªÛ
+ÛÙØªÛ
+ÛÙØ±ÛØ¨
+ÛÙØ±ÛÛ
+ÛÙØ±ÛÛ
+ÛÙØ¶Ú©ØªØ¨
+ÛÙØ¶Ú©ØªÛ
+ÛÙØ¶Ú©ØªÛ
+ÛÙÙØ¨
+ÛÙÙÛ
+ÛÙÙÛ
+ÛÙÚÚ©Ø¨
+ÛÙÚÚ©Û
+ÛÙÚÚ©Û
+ÛÙÚ¯Ø¦Û
+ÛÙÚ¯Ø¦Û
+ÛÙÚ¯ÛØ¨
+ÛÙÚº
+ÛÛ
+ÛÛÚº
+ÛÛ
+Û
+ÛÙÛÙÛ
+ÛÛ
+ÛÛØ¨Úº
\ No newline at end of file
diff --git a/static/stopwords/vi b/static/stopwords/vi
new file mode 100644
index 0000000..f480ff8
--- /dev/null
+++ b/static/stopwords/vi
@@ -0,0 +1,645 @@
+a ha
+a-lÃ´
+ai
+ai ai
+ai náº¥y
+alÃ´
+amen
+anh
+bao giá»
+bao lÃ¢u
+bao nhiÃªu
+bao náº£
+bay biáº¿n
+biáº¿t
+biáº¿t bao
+biáº¿t bao nhiÃªu
+biáº¿t chá»«ng nÃ o
+biáº¿t máº¥y
+biáº¿t ÄÃ¢u
+biáº¿t ÄÃ¢u chá»«ng
+biáº¿t ÄÃ¢u Äáº¥y
+bÃ 
+bÃ i
+bÃ¡c
+bÃ¢y báº©y
+bÃ¢y chá»«
+bÃ¢y giá»
+bÃ¢y nhiÃªu
+bÃ¨n
+bÃ©ng
+bÃ´ng
+báº¡n
+báº£n
+báº¥t chá»£t
+báº¥t cá»©
+báº¥t giÃ¡c
+báº¥t kÃ¬
+báº¥t ká»
+báº¥t ká»³
+báº¥t luáº­n
+báº¥t nhÆ°á»£c
+báº¥t quÃ¡
+báº¥t thÃ¬nh lÃ¬nh
+báº¥t tá»­
+báº¥t Äá»
+báº¥y
+báº¥y cháº§y
+báº¥y chá»«
+báº¥y giá»
+báº¥y lÃ¢u
+báº¥y lÃ¢u nay
+báº¥y nay
+báº¥y nhiÃªu
+báº­p bÃ  báº­p bÃµm
+báº­p bÃµm
+báº¯t Äáº§u tá»«
+báº±ng
+báº±ng khÃ´ng
+báº±ng náº¥y
+báº±ng áº¥y
+bá»n
+bá»t
+bá»
+bá» máº¹
+bá»ng
+bá»ng chá»c
+bá»ng dÆ°ng
+bá»ng khÃ´ng
+bá»ng nhiÃªn
+bá»ng ÄÃ¢u
+bá»
+bá»i pháº§n
+bá»
+bá»i
+bá»i chÆ°ng
+bá»i nhÆ°ng
+bá»i tháº¿
+bá»i vÃ¬
+bá»i váº­y
+bá»©c
+cao
+cha
+cha cháº£
+chao Ã´i
+chiáº¿c
+cho
+cho nÃªn
+cho tá»i
+cho tá»i khi
+cho Äáº¿n
+cho Äáº¿n khi
+choa
+chu cha
+chui cha
+chung cá»¥c
+chung qui
+chung quy
+chung quy láº¡i
+chuyá»n
+chÃ nh cháº¡nh
+chÃ­ cháº¿t
+chÃ­nh
+chÃ­nh lÃ 
+chÃ­nh thá»
+chÃ¹n chÃ¹n
+chÃ¹n chÅ©n
+chÃº
+chÃº mÃ y
+chÃº mÃ¬nh
+chÃºng mÃ¬nh
+chÃºng ta
+chÃºng tÃ´i
+chÄn cháº¯n
+chÄng
+chÆ°a
+cháº§m cháº­p
+cháº­c
+cháº¯c
+cháº¯c háº³n
+cháº³ng láº½
+cháº³ng nhá»¯ng
+cháº³ng ná»¯a
+cháº³ng pháº£i
+cháº¿t ná»i
+cháº¿t tháº­t
+cháº¿t tiá»t
+chá»
+chá»n
+chá»c chá»c
+chá»
+chá» chi
+chá»£t
+chá»§n
+chá»©
+chá»© lá»
+coi bá»
+coi mÃ²i
+con
+cu cáº­u
+cuá»n
+cuá»c
+cÃ ng
+cÃ¡c
+cÃ¡i
+cÃ¢y
+cÃ²n
+cÃ³
+cÃ³ chÄng lÃ 
+cÃ³ dá»
+cÃ³ thá»
+cÃ³ váº»
+cÃ³c khÃ´
+cÃ´
+cÃ´ mÃ¬nh
+cÃ´ng nhiÃªn
+cÃ¹ng
+cÃ¹ng cá»±c
+cÃ¹ng nhau
+cÃ¹ng vá»i
+cÄn
+cÄn cáº¯t
+cÅ©ng
+cÅ©ng nhÆ°
+cÅ©ng váº­y
+cÅ©ng váº­y thÃ´i
+cÆ¡
+cÆ¡ chá»«ng
+cÆ¡ há»
+cÆ¡ mÃ 
+cÆ¡n
+cáº£
+cáº£ tháº£y
+cáº£ thá»
+cáº£m Æ¡n
+cáº§n
+cáº­t lá»±c
+cáº­t sá»©c
+cáº­u
+cá» lai
+cá»§a
+cá»©
+cá»© viá»c
+cá»±c lá»±c
+do
+do vÃ¬
+do váº­y
+do ÄÃ³
+duy
+dÃ o
+dÃ¬
+dÃ¹ cho
+dÃ¹ ráº±ng
+dÆ°á»i
+dáº¡
+dáº§n dÃ 
+dáº§n dáº§n
+dáº§u sao
+dáº«u
+dáº«u sao
+dá» sá»£
+dá» thÆ°á»ng
+dá» chá»«ng
+dá»¯
+em
+giá»¯a
+gÃ¬
+hay
+hoÃ n toÃ n
+hoáº·c
+hÆ¡n
+háº§u háº¿t
+há»
+há»i
+khi
+khÃ¡c
+khÃ´ng
+luÃ´n
+lÃ 
+lÃ m
+lÃªn
+lÃºc
+láº¡i
+láº§n
+lá»n
+muá»n
+mÃ 
+mÃ¬nh
+má»i
+má»t
+má»t cÃ¡ch
+má»i
+má»£
+ngay
+ngay cáº£
+ngay khi
+ngay lÃºc
+ngay láº­p tá»©c
+ngay tá»©c kháº¯c
+ngay tá»«
+nghe chá»«ng
+nghe ÄÃ¢u
+nghen
+nghiá»m nhiÃªn
+nghá»m
+ngoÃ i
+ngoÃ i ra
+ngoáº£i
+ngÃ y
+ngÃ y cÃ ng
+ngÃ y ngÃ y
+ngÃ y xÆ°a
+ngÃ y xá»­a
+ngÃ´i
+ngÃµ háº§u
+ngÄn ngáº¯t
+ngÆ°Æ¡i
+ngÆ°á»i
+ngá»n
+ngá»t
+ngá» nhá»¡
+nh
+nhau
+nhiÃªn háº­u
+nhiá»u
+nhiá»t liá»t
+nhung nhÄng
+nhÃ 
+nhÃ¢n dá»p
+nhÃ¢n tiá»n
+nhÃ©
+nhÃ³n nhÃ©n
+nhÆ°
+nhÆ° chÆ¡i
+nhÆ° khÃ´ng
+nhÆ° quáº£
+nhÆ° thá»
+nhÆ° tuá»ng
+nhÆ° váº­y
+nhÆ°ng
+nhÆ°ng mÃ 
+nhÆ°á»£c báº±ng
+nháº¥t
+nháº¥t loáº¡t
+nháº¥t luáº­t
+nháº¥t má»±c
+nháº¥t nháº¥t
+nháº¥t quyáº¿t
+nháº¥t sinh
+nháº¥t thiáº¿t
+nháº¥t tÃ¢m
+nháº¥t tá»
+nháº¥t ÄÃ¡n
+nháº¥t Äá»nh
+nháº­n
+nhá»
+nhá»¡ ra
+nhá»¯ng
+nhá»¯ng ai
+nhá»¯ng nhÆ°
+nÃ o
+nÃ y
+nÃªn
+nÃªn chi
+nÃ³
+nÃ³c
+nÃ³i
+nÄm
+nÆ¡i
+náº¥y
+náº¿u
+náº¿u nhÆ°
+ná»n
+ná»
+ná»
+ná»©c ná»
+ná»¯a
+oai oÃ¡i
+oÃ¡i
+pho
+phÃ¨
+phÃ³c
+phÃ³t
+phÄn pháº¯t
+phÆ°Æ¡ng chi
+pháº£i
+pháº£i chi
+pháº£i chÄng
+pháº¯t
+phá» phui
+phá»ng
+phá»ng nhÆ°
+phá»c
+phá»¥t
+phá»©t
+qua
+qua quÃ­t
+qua quÃ½t
+quyáº¿t
+quyáº¿t nhiÃªn
+quyá»n
+quÃ¡
+quÃ¡ chá»«ng
+quÃ¡ láº¯m
+quÃ¡ sÃ¡
+quÃ¡ thá»
+quÃ¡ trá»i
+quÃ¡ xÃ¡
+quÃ¡ Äá»i
+quÃ¡ Äá»
+quÃ¡ Æ°
+quÃ½ há»
+quáº£
+quáº£ lÃ 
+quáº£ tang
+quáº£ tháº­t
+quáº£ tÃ¬nh
+quáº£ váº­y
+quáº£ ÄÃºng
+ra
+ra pháº¿t
+ra sao
+ra trÃ²
+ren rÃ©n
+riu rÃ­u
+riÃªng
+riá»t
+rÃ y
+rÃ¡o
+rÃ¡o trá»i
+rÃ©n
+rÃ­ch
+rÃ³n rÃ©n
+rÃºt cá»¥c
+rÄng
+ráº¥t
+ráº±ng
+ráº±ng lÃ 
+rá»t cuá»c
+rá»t cá»¥c
+rá»i
+rá»©a
+sa sáº£
+sao
+sau
+sau chÃ³t
+sau cuá»i
+sau cÃ¹ng
+sau ÄÃ³
+so
+song le
+suÃ½t
+sÃ¬
+sáº¡ch
+sáº¥t
+sáº¯p
+sáº½
+sá»
+sá» lÃ 
+sá»t sá»t
+sá» dÄ©
+sá»±
+tanh
+tha há»
+than Ã´i
+thanh
+theo
+thi thoáº£ng
+thoáº¡t
+thoáº¡t nhiÃªn
+thoáº¯t
+thuáº§n
+thÃ 
+thÃ  lÃ 
+thÃ  ráº±ng
+thÃ nh ra
+thÃ nh thá»­
+thÃ¡i quÃ¡
+thÃ¡ng
+thÃ¬
+thÃ¬ thÃ´i
+thÃ¬nh lÃ¬nh
+thÃ­m
+thÃ´i
+thÃºng tháº¯ng
+thÆ°Æ¡ng Ã´i
+thÆ°á»ng
+tháº£o hÃ¨n
+tháº£o nÃ o
+tháº¥y
+tháº©y
+tháº­m
+tháº­m chÃ­
+tháº­t lá»±c
+tháº­t ra
+tháº­t váº­y
+tháº¿
+tháº¿ lÃ 
+tháº¿ mÃ 
+tháº¿ nÃ o
+tháº¿ nÃªn
+tháº¿ ra
+tháº¿ thÃ¬
+tháº¿ Ã 
+tháº¿ch
+thá»nh thoáº£ng
+thá»m
+thá»c
+thá»c thÃ¡o
+thá»t
+thá»t nhiÃªn
+thá»c
+thá»i gian
+thá»¥c máº¡ng
+thá»­a
+thá»±c ra
+thá»±c sá»±
+thá»±c váº­y
+tiáº¿p theo
+tiáº¿p ÄÃ³
+tiá»n thá»
+toÃ 
+toÃ© khÃ³i
+toáº¹t
+trong
+trÃªn
+trÆ°á»c
+trÆ°á»c kia
+trÆ°á»c nay
+trÆ°á»c tiÃªn
+trÆ°á»c ÄÃ¢y
+trÆ°á»c ÄÃ³
+tráº¿u trÃ¡o
+trá»n
+trá»t
+trá»u tráº¡o
+trá»ng
+trá»i Äáº¥t Æ¡i
+trá»« phi
+tuy
+tuy nhiÃªn
+tuy ráº±ng
+tuy tháº¿
+tuy váº­y
+tuyá»t nhiÃªn
+tuáº§n tá»±
+tuá»t luá»t
+tuá»t tuá»n tuá»t
+tuá»t tuá»t
+tÃ  tÃ 
+tÃªnh
+tÃ­t mÃ¹
+tÃ² te
+tÃ´i
+tÃ´ng tá»c
+tÃ¹ tÃ¬
+tÄm táº¯p
+táº¡i
+táº¡i vÃ¬
+táº¥m
+táº¥n
+táº¥t cáº£
+táº¥t tháº£y
+táº¥t táº§n táº­t
+táº¥t táº­t
+táº¯p
+táº¯p lá»±
+tá»t
+tá» ra
+tá» váº»
+tá»c táº£
+tá»i Æ°
+tá»t
+tá»
+tá»i
+tá»©c thÃ¬
+tá»©c tá»c
+tá»«
+tá»«ng
+tá»± vÃ¬
+tá»±u trung
+veo
+veo veo
+viá»c
+vung thiÃªn Äá»a
+vung tÃ n tÃ¡n
+vung tÃ¡n tÃ n
+vÃ 
+vÃ o
+vÃ¢ng
+vÃ¨o
+vÃ¬
+vÃ¬ chÆ°ng
+vÃ¬ tháº¿
+vÃ¬ váº­y
+vÃ­ báº±ng
+vÃ­ dÃ¹
+vÃ­ phá»ng
+vÃ­ thá»­
+vÃ´ hÃ¬nh trung
+vÃ´ ká»
+vÃ´ luáº­n
+vÃ´ vÃ n
+vÄng tÃª
+váº¡n nháº¥t
+váº£ chÄng
+váº£ láº¡i
+váº«n
+váº­y
+váº­y lÃ 
+váº­y thÃ¬
+vá»
+vá» táº¥t
+vá»n dÄ©
+vá»i
+vá»i láº¡i
+vá»
+vá»¥t
+vá»«a
+vá»«a má»i
+xa xáº£
+xiáº¿t bao
+xon xÃ³n
+xoÃ nh xoáº¡ch
+xoÃ©t
+xoáº³n
+xoáº¹t
+xuáº¥t kÃ¬ báº¥t Ã½
+xuáº¥t ká»³ báº¥t Ã½
+xuá»
+xuá»ng
+xÄm xÃºi
+xÄm xÄm
+xÄm xáº¯m
+xá»nh xá»ch
+xá»p
+Ã 
+Ã  Æ¡i
+Ã o
+Ã¡
+Ã¡ Ã 
+Ã¡i
+Ã¡i chÃ 
+Ã¡i dÃ 
+Ã¡ng
+Ã¢u lÃ 
+Ã´ hay
+Ã´ hÃ´
+Ã´ kÃª
+Ã´ kÃ¬a
+Ã´i chao
+Ã´i thÃ´i
+Ã´ng
+Ãºi
+Ãºi chÃ 
+Ãºi dÃ o
+Ã½
+Ã½ chá»«ng
+Ã½ da
+Äang
+Äi
+Äiá»u
+ÄÃ nh Äáº¡ch
+ÄÃ¡ng lÃ­
+ÄÃ¡ng lÃ½
+ÄÃ¡ng láº½
+ÄÃ¡nh ÄÃ¹ng
+ÄÃ¡o Äá»
+ÄÃ¢y
+ÄÃ£
+ÄÃ³
+ÄÆ°á»£c
+Äáº¡i loáº¡i
+Äáº¡i nhÃ¢n
+Äáº¡i phÃ m
+Äáº¡i Äá»
+Äáº¿n
+Äáº¿n ná»i
+Äá»u
+Äá»
+Æ¡
+Æ¡ hay
+Æ¡ kÃ¬a
+Æ¡i
+Æ°
+áº¡
+áº¡ Æ¡i
+áº¥y
+áº§u Æ¡
+áº¯t
+áº¯t háº³n
+áº¯t lÃ 
+á»i dÃ o
+á»i giá»i
+á»i giá»i Æ¡i
+á»
+á»ng
+á»
+á»
+á»
+á» trÃªn
+á»§a
+á»© há»±
+á»© á»«
+á»«
+á»­
\ No newline at end of file
diff --git a/static/stopwords/yo b/static/stopwords/yo
new file mode 100644
index 0000000..60572e9
--- /dev/null
+++ b/static/stopwords/yo
@@ -0,0 +1,60 @@
+a
+an
+bÃ¡
+bÃ­
+báº¹Ìráº¹Ì
+fÃºn
+fáº¹Ì
+gbogbo
+inÃº
+jÃ¹
+jáº¹
+jáº¹Ì
+kan
+kÃ¬
+kÃ­
+kÃ²
+lÃ¡ti
+lÃ¨
+lá»
+mi
+mo
+mÃ¡a
+má»Ì
+ni
+nÃ¡Ã 
+nÃ­
+nÃ­gbÃ 
+nÃ­torÃ­
+nÇ¹kan
+o
+padÃ 
+pÃ©
+pÃºpá»Ì
+páº¹ÌlÃº
+ráº¹Ì
+sÃ¬
+sÃ­
+sÃ­nÃº
+sÌ£
+ti
+tÃ­
+wÃ 
+wÃ¡
+wá»n
+wá»Ìn
+yÃ¬Ã­
+Ã ti
+Ã wá»n
+Ã©
+Ã­
+Ã²un
+Ã³
+Å
+ÅlÃ¡
+á¹£e
+á¹£Ã©
+á¹£Ã¹gbá»Ìn
+áº¹má»Ì
+á»já»Ì
+á»Ìpá»Ìlá»pá»Ì
\ No newline at end of file
diff --git a/static/stopwords/zh b/static/stopwords/zh
new file mode 100644
index 0000000..bc313f5
--- /dev/null
+++ b/static/stopwords/zh
@@ -0,0 +1,794 @@
+ã
+ã
+ã
+ã
+ã
+ã
+ä¸
+ä¸ä¸ª
+ä¸äº
+ä¸ä½
+ä¸å
+ä¸å
+ä¸æ¹é¢
+ä¸æ¦
+ä¸æ¥
+ä¸æ ·
+ä¸ç§
+ä¸è¬
+ä¸è½¬ç¼
+ä¸
+ä¸ä¸
+ä¸
+ä¸
+ä¸ä¸
+ä¸
+ä¸
+ä¸ä»
+ä¸ä½
+ä¸å
+ä¸å
+ä¸åª
+ä¸å¤ä¹
+ä¸å¦
+ä¸å¦¨
+ä¸å°½
+ä¸å°½ç¶
+ä¸å¾
+ä¸æ
+ä¸æ
+ä¸æ
+ä¸æ
+ä¸æ
+ä¸æ¯
+ä¸æ¯
+ä¸ç¶
+ä¸ç¹
+ä¸ç¬
+ä¸ç®¡
+ä¸è³äº
+ä¸è¥
+ä¸è®º
+ä¸è¿
+ä¸é®
+ä¸
+ä¸å¶
+ä¸å¶è¯´
+ä¸å¦
+ä¸æ­¤åæ¶
+ä¸
+ä¸ä¸è¯´
+ä¸è¯´
+ä¸¤è
+ä¸ª
+ä¸ªå«
+ä¸­
+ä¸´
+ä¸º
+ä¸ºäº
+ä¸ºä»ä¹
+ä¸ºä½
+ä¸ºæ­¢
+ä¸ºæ­¤
+ä¸ºç
+ä¹
+ä¹è³
+ä¹è³äº
+ä¹
+ä¹
+ä¹ä¸
+ä¹æä»¥
+ä¹ç±»
+ä¹ä¹
+ä¹
+ä¹
+ä¹
+ä¹
+ä¹å¥½
+ä¹ç½¢
+äº
+äº
+äºæ¥
+äº
+äºæ¯
+äºæ¯ä¹
+äºäº
+äºå°
+äº
+äº
+äº¦
+äºº
+äººä»¬
+äººå®¶
+ä»
+ä»ä¹
+ä»ä¹æ ·
+ä»
+ä»äº
+ä»
+ä»æ§
+ä»
+ä»æ­¤
+ä»è
+ä»
+ä»äºº
+ä»ä»¬
+ä»ä»¬ä»¬
+ä»¥
+ä»¥ä¸
+ä»¥ä¸º
+ä»¥ä¾¿
+ä»¥å
+ä»¥å
+ä»¥æ
+ä»¥æ
+ä»¥æ¥
+ä»¥è³
+ä»¥è³äº
+ä»¥è´
+ä»¬
+ä»»
+ä»»ä½
+ä»»å­
+ä¼
+ä¼¼ç
+ä½
+ä½å¡
+ä½æ¯
+ä½
+ä½ä»¥
+ä½åµ
+ä½å¤
+ä½æ¶
+ä½å¤
+ä½ä¸º
+ä½ 
+ä½ ä»¬
+ä½¿
+ä½¿å¾
+ä¾å¦
+ä¾
+ä¾æ®
+ä¾ç§
+ä¾¿äº
+ä¿º
+ä¿ºä»¬
+å
+åä½¿
+åæ
+åç¶
+åè¥
+å
+åå¥ç¶
+åä½¿
+åå¦
+åè¥
+å
+å
+å¿
+åä¸å
+å
+åæ¯
+å¨ä½
+å¨é¨
+å«
+å­
+å®
+å±
+å³äº
+å³äºå·ä½å°è¯´
+å¶
+å¶ä¸
+å¶ä¸­
+å¶äº
+å¶ä»
+å¶ä½
+å¶å®
+å¶æ¬¡
+å·ä½å°è¯´
+å·ä½è¯´æ¥
+å¼ä¹
+å
+å
+åå¶æ¬¡
+åå
+åæ
+åè
+åèè¯´
+åè¯´
+å
+å²
+åµä¸
+å 
+å æ¶
+å¡
+å¡æ¯
+å­
+å­å
+åºäº
+åºæ¥
+å
+åå«
+å
+åç
+å«
+å«äºº
+å«å¤
+å«æ¯
+å«ç
+å«ç®¡
+å«è¯´
+å°
+åå
+åæ­¤
+åè
+å ä¹
+å ä»¥
+åº
+å³
+å³ä»¤
+å³ä½¿
+å³ä¾¿
+å³å¦
+å³æ
+å³è¥
+å´
+å»
+å
+åå
+å
+åå¶
+åè³
+åä¹
+åè
+åè¿æ¥
+åè¿æ¥è¯´
+åå°
+å¦
+å¦ä¸æ¹é¢
+å¦å¤
+å¦æ
+åª
+åªå½
+åªæ
+åªæ¯
+åªæ
+åªæ¶
+åªè¦
+åªé
+å«
+å®å
+å¯
+å¯ä»¥
+å¯æ¯
+å¯è§
+å
+åä¸ª
+åä½
+åç§
+åèª
+å
+åæ¶
+å
+åè
+å
+åä½¿
+åç
+å
+å
+å¦å
+å§
+å§å
+å«
+å±
+å
+å
+å
+å
+å
+åå¼
+å¢
+åµ
+åµåµ
+å¸
+å¼å§
+å
+å
+å
+å¦
+å§
+å±
+å±ä»¬
+å³
+å
+å
+åå
+å
+å
+åå
+åå
+å
+å
+å¦
+å©
+åª
+åªä¸ª
+åªäº
+åªå¿
+åªå¤©
+åªå¹´
+åªæ
+åªæ ·
+åªè¾¹
+åªé
+å¼
+å¼å·
+å
+å¯æ
+å
+å
+å¥
+å¦
+åªè¾¾
+å·å½
+å
+å
+åå·
+å½
+å¡
+å¡å¡
+å¬
+å¯
+å³
+å
+åç»
+å
+å
+å»
+å¿
+å¿å¿
+å
+å 
+å ä¸º
+å äº
+å æ­¤
+å ç
+å è
+åºç¶
+å¨
+å¨ä¸
+å¨äº
+å°
+åºäº
+å¤å¨
+å¤
+å¤ä¹
+å¤å°
+å¤§
+å¤§å®¶
+å¥¹
+å¥¹ä»¬
+å¥½
+å¦
+å¦ä¸
+å¦ä¸æè¿°
+å¦ä¸
+å¦ä½
+å¦å¶
+å¦å
+å¦æ¯
+å¦æ
+å¦æ­¤
+å¦è¥
+å§è
+å­°æ
+å­°ç¥
+å®
+å®å¯
+å®æ¿
+å®è¯
+å®
+å®ä»¬
+å¯¹
+å¯¹äº
+å¯¹å¾
+å¯¹æ¹
+å¯¹æ¯
+å°
+å°
+å°
+å°å
+å°å°
+å°ä¸
+å°±
+å°±æ¯
+å°±æ¯äº
+å°±æ¯è¯´
+å°±ç®
+å°±è¦
+å°½
+å°½ç®¡
+å°½ç®¡å¦æ­¤
+å²ä½
+å·±
+å·²
+å·²ç£
+å·´
+å·´å·´
+å¹´
+å¹¶
+å¹¶ä¸
+åº¶ä¹
+åº¶å 
+å¼å¤
+å¼å§
+å½
+å½é½
+å½
+å½å°
+å½ç¶
+å½ç
+å½¼
+å½¼æ¶
+å½¼æ­¤
+å¾
+å¾
+å¾
+å¾
+å¾äº
+æ
+æä¹
+æä¹å
+æä¹æ ·
+æå¥
+ææ ·
+æ»ä¹
+æ»çæ¥ç
+æ»çæ¥è¯´
+æ»çè¯´æ¥
+æ»èè¨ä¹
+æ°æ°ç¸å
+æ¨
+æå¶
+æ¢è¯´
+æ
+æä»¬
+æ
+æå
+ææ¯
+ææ°
+æè
+æªè³
+æ
+æä»¥
+æå¨
+æå¹¸
+ææ
+æ
+æè½
+æ
+æä»
+æ
+ææ
+æ¿
+æ
+æç§
+æ¢å¥è¯è¯´
+æ¢è¨ä¹
+æ®
+æ®æ­¤
+æ¥ç
+æ
+ææ­¤
+æè
+æäºº
+æ 
+æ å®
+æ è®º
+æ¢
+æ¢å¾
+æ¢æ¯
+æ¢ç¶
+æ¥
+æ¶
+æ¶å
+æ¯
+æ¯ä»¥
+æ¯ç
+æ´
+æ¾
+æ¿
+æ¿ä»£
+æ
+æ
+æ
+æäº
+æå³
+æå
+ææ¶
+æç
+æ
+æ
+æç
+æ¬
+æ¬äºº
+æ¬å°
+æ¬ç
+æ¬èº«
+æ¥
+æ¥ç
+æ¥èª
+æ¥è¯´
+æäº
+æç¶
+æç
+æ
+æä¸ª
+æäº
+ææ
+æ ¹æ®
+æ¬¤
+æ­£å¼
+æ­£å¦
+æ­£å·§
+æ­£æ¯
+æ­¤
+æ­¤å°
+æ­¤å¤
+æ­¤å¤
+æ­¤æ¶
+æ­¤æ¬¡
+æ­¤é´
+æ¯å®
+æ¯
+æ¯å½
+æ¯
+æ¯å
+æ¯å¦
+æ¯æ¹
+æ²¡å¥ä½
+æ²¿
+æ²¿ç
+æ¼«è¯´
+ç¹
+ç
+ç¶å
+ç¶å
+ç¶è
+ç§
+ç§ç
+ç¹ä¸
+ç¹èª
+çä¸
+çä¹
+çæ
+çè
+çè³
+çè³äº
+ç¨
+ç¨æ¥
+ç±
+ç±äº
+ç±æ¯
+ç±æ­¤
+ç±æ­¤å¯è§
+ç
+çç¡®
+çè¯
+ç´å°
+ç¸å¯¹èè¨
+çå¾
+ç
+ç¨ç¼
+ç
+çå¢
+ç£
+ç£ä¹
+ç£å
+ç¦»
+ç§
+ç§°
+ç«è
+ç¬¬
+ç­
+ç­å°
+ç­ç­
+ç®è¨ä¹
+ç®¡
+ç±»å¦
+ç´§æ¥ç
+çºµ
+çºµä»¤
+çºµä½¿
+çºµç¶
+ç»
+ç»è¿
+ç»æ
+ç»
+ç»§ä¹
+ç»§å
+ç»§è
+ç»¼ä¸æè¿°
+ç½¢äº
+è
+è
+èä¸
+èåµ
+èå
+èå¤
+èå·²
+èæ¯
+èè¨
+è½
+è½å¦
+è¾
+èª
+èªä¸ªå¿
+èªä»
+èªåå¿
+èªå
+èªå®¶
+èªå·±
+èªæ
+èªèº«
+è³
+è³äº
+è³ä»
+è³è¥
+è´
+è¬ç
+è¥
+è¥å¤«
+è¥æ¯
+è¥æ
+è¥é
+è«ä¸ç¶
+è«å¦
+è«è¥
+è½
+è½å
+è½ç¶
+è½è¯´
+è¢«
+è¦
+è¦ä¸
+è¦ä¸æ¯
+è¦ä¸ç¶
+è¦ä¹
+è¦æ¯
+è­¬å»
+è­¬å¦
+è®©
+è®¸å¤
+è®º
+è®¾ä½¿
+è®¾æ
+è®¾è¥
+è¯å¦
+è¯ç¶
+è¯¥
+è¯´
+è¯´æ¥
+è¯·
+è¯¸
+è¯¸ä½
+è¯¸å¦
+è°
+è°äºº
+è°æ
+è°ç¥
+è´¼æ­»
+èµä»¥
+èµ¶
+èµ·
+èµ·è§
+è¶
+è¶ç
+è¶æ¯
+è·
+è·
+è¾
+è¾ä¹
+è¾¹
+è¿
+è¿
+è¿æ¯
+è¿æ
+è¿è¦
+è¿
+è¿ä¸æ¥
+è¿ä¸ª
+è¿ä¹
+è¿ä¹äº
+è¿ä¹æ ·
+è¿ä¹ç¹å¿
+è¿äº
+è¿ä¼å¿
+è¿å¿
+è¿å°±æ¯è¯´
+è¿æ¶
+è¿æ ·
+è¿æ¬¡
+è¿è¬
+è¿è¾¹
+è¿é
+è¿è
+è¿
+è¿å
+éæ­¥
+éè¿
+éµå¾ª
+éµç§
+é£
+é£ä¸ª
+é£ä¹
+é£ä¹äº
+é£ä¹æ ·
+é£äº
+é£ä¼å¿
+é£å¿
+é£æ¶
+é£æ ·
+é£è¬
+é£è¾¹
+é£é
+é½
+éäºº
+é´äº
+éå¯¹
+é¿
+é¤
+é¤äº
+é¤å¤
+é¤å¼
+é¤æ­¤ä¹å¤
+é¤é
+é
+éå
+éæ¶
+éç
+é¾éè¯´
+é¶
+é
+éä½
+éå¾
+éç¹
+éç¬
+é 
+é¡º
+é¡ºç
+é¦å
+ï¸¿
+ï¼
+ï¼
+ï¼
+ï¼
+ï¼
+ï¼
+ï¼
+ï¼
+ï¼
+ï¼
+ï¼
+ï¼
+ï¼
+ï¼
+ï¼
+ï¼
+ï¼
+ï¼
+ï¼
+ï¼
+ï¼
+ï¼
+ï¼
+ï¼
+ï¼
+ï¼ 
+ï¼»
+ï¼½
+ï½
+ï½
+ï½
+ï½
+ï¿¥
\ No newline at end of file
diff --git a/static/stopwords/zu b/static/stopwords/zu
new file mode 100644
index 0000000..36c570c
--- /dev/null
+++ b/static/stopwords/zu
@@ -0,0 +1,29 @@
+futhi
+kahle
+kakhulu
+kanye
+khona
+kodwa
+kungani
+kusho
+la
+lakhe
+lapho
+mina
+ngesikhathi
+nje
+phansi
+phezulu
+u
+ukuba
+ukuthi
+ukuze
+uma
+wahamba
+wakhe
+wami
+wase
+wathi
+yakhe
+zakhe
+zonke
\ No newline at end of file
diff --git a/subprojects/gtest.wrap b/subprojects/gtest.wrap
new file mode 100644
index 0000000..ba9c9b9
--- /dev/null
+++ b/subprojects/gtest.wrap
@@ -0,0 +1,10 @@
+[wrap-file]
+directory = googletest-release-1.8.1
+
+source_url = https://github.com/google/googletest/archive/release-1.8.1.zip
+source_filename = gtest-1.8.1.zip
+source_hash = 927827c183d01734cc5cfef85e0ff3f5a92ffe6188e0d18e909c5efebf28a0c7
+
+patch_url = https://wrapdb.mesonbuild.com/v1/projects/gtest/1.8.1/1/get_zip
+patch_filename = gtest-1.8.1-1-wrap.zip
+patch_hash = f79f5fd46e09507b3f2e09a51ea6eb20020effe543335f5aee59f30cc8d15805
diff --git a/subprojects/liblzma.wrap b/subprojects/liblzma.wrap
new file mode 100644
index 0000000..af94ffc
--- /dev/null
+++ b/subprojects/liblzma.wrap
@@ -0,0 +1,13 @@
+[wrap-file]
+directory = xz-5.2.1
+
+source_url = http://tukaani.org/xz/xz-5.2.1.tar.xz
+source_filename = xz-5.2.1.tar.xz
+source_hash = 6ecdd4d80b12001497df0741d6037f918d270fa0f9a1ab4e2664bf4157ae323c
+
+patch_url = https://mirror.download.kiwix.org/dev/xz-5.2.1-wrap.zip
+patch_filename = xz-5.2.1-wrap.zip
+patch_hash = 782a4e56bcc26ebda18041a05f2f85dce70284109a5ce99ea960c6b4432a99e9
+
+[provide]
+liblzma = lzma_dep
diff --git a/subprojects/zstd.wrap b/subprojects/zstd.wrap
new file mode 100644
index 0000000..e3dd1e9
--- /dev/null
+++ b/subprojects/zstd.wrap
@@ -0,0 +1,13 @@
+[wrap-file]
+directory = zstd-1.4.5
+source_url = https://github.com/facebook/zstd/releases/download/v1.4.5/zstd-1.4.5.tar.gz
+source_filename = zstd-1.4.5.tar.gz
+source_hash = 98e91c7c6bf162bf90e4e70fdbc41a8188b9fa8de5ad840c401198014406ce9e
+
+patch_url = https://mirror.download.kiwix.org/dev/zstd-1.4.5-wrap.zip
+patch_filename = zstd-1.4.5-wrap.zip
+patch_hash = 4462693b58939b61ab76c5e5597343ab156eb0681b60a77908d2b88e17dca7cc
+
+[provide]
+libzstd = libzstd_dep
+
diff --git a/test/archive.cpp b/test/archive.cpp
new file mode 100644
index 0000000..0a7123f
--- /dev/null
+++ b/test/archive.cpp
@@ -0,0 +1,697 @@
+/*
+ * Copyright (C) 2020 Veloman Yunkan
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#define ZIM_PRIVATE
+#include <zim/zim.h>
+#include <zim/archive.h>
+#include <zim/item.h>
+#include <zim/search.h>
+#include <zim/suggestion.h>
+#include <zim/error.h>
+
+#include <zim/writer/creator.h>
+
+#include "tools.h"
+#include "../src/fs.h"
+
+#include "gtest/gtest.h"
+
+namespace
+{
+
+using zim::unittests::makeTempFile;
+using zim::unittests::getDataFilePath;
+using zim::unittests::TempFile;
+using zim::unittests::TestItem;
+using zim::unittests::IsFrontArticle;
+
+using TestContextImpl = std::vector<std::pair<std::string, std::string> >;
+struct TestContext : TestContextImpl {
+  TestContext(const std::initializer_list<value_type>& il)
+    : TestContextImpl(il)
+  {}
+};
+
+std::ostream& operator<<(std::ostream& out, const TestContext& ctx)
+{
+  out << "Test context:\n";
+  for ( const auto& kv : ctx )
+    out << "\t" << kv.first << ": " << kv.second << "\n";
+  out << std::endl;
+  return out;
+}
+
+std::string
+emptyZimArchiveContent()
+{
+  std::string content;
+  content += "ZIM\x04"; //Â Magic
+  content += "\x05" + std::string(3, '\0'); // Version
+  content += std::string(16, '\0'); // uuid
+  content += std::string(4, '\0'); // article count
+  content += std::string(4, '\0'); // cluster count
+  content += "\x51" + std::string(7, '\0'); // url ptr pos
+  content += "\x51" + std::string(7, '\0'); // title ptr pos
+  content += "\x51" + std::string(7, '\0'); // cluster ptr pos
+  content += "\x50" + std::string(7, '\0'); // mimelist ptr pos
+  content += std::string(4, '\0'); // main page index
+  content += std::string(4, '\0'); // layout page index
+  content += "\x51" + std::string(7, '\0'); // checksum pos
+  content += std::string(1, '\0');; // (empty) mimelist
+  content += "\x9f\x3e\xcd\x95\x46\xf6\xc5\x3b\x35\xb4\xc6\xd4\xc0\x8e\xd0\x66"; // md5sum
+  return content;
+}
+
+TEST(ZimArchive, openingAnInvalidZimArchiveFails)
+{
+  const char* const prefixes[] = { "ZIM\x04", "" };
+  const unsigned char bytes[] = {0x00, 0x01, 0x11, 0x30, 0xFF};
+  for ( const std::string prefix : prefixes ) {
+    for ( const unsigned char byte : bytes ) {
+      for ( int count = 0; count < 100; count += 10 ) {
+        const TestContext ctx{
+                {"prefix",  prefix.size() ? "yes" : "no" },
+                {"byte", zim::unittests::to_string(byte) },
+                {"count", zim::unittests::to_string(count) }
+        };
+        const std::string zimfileContent = prefix + std::string(count, byte);
+        const auto tmpfile = makeTempFile("invalid_zim_file", zimfileContent);
+
+        EXPECT_THROW( zim::Archive(tmpfile->path()), std::runtime_error ) << ctx;
+      }
+    }
+  }
+}
+
+TEST(ZimArchive, openingAnEmptyZimArchiveSucceeds)
+{
+  const auto tmpfile = makeTempFile("empty_zim_file", emptyZimArchiveContent());
+
+  zim::Archive archive(tmpfile->path());
+  ASSERT_TRUE(archive.check());
+}
+
+bool isNastyOffset(int offset) {
+  if ( 6 <= offset && offset < 24 ) // Minor version or uuid
+    return false;
+
+  if ( 64 <= offset && offset < 72 ) // page or layout index
+    return false;
+
+  return true;
+}
+
+TEST(ZimArchive, nastyEmptyZimArchive)
+{
+  const std::string correctContent = emptyZimArchiveContent();
+  for ( int offset = 0; offset < 80; ++offset ) {
+    if ( isNastyOffset(offset) ) {
+      const TestContext ctx{ {"offset", zim::unittests::to_string(offset) } };
+      std::string nastyContent(correctContent);
+      nastyContent[offset] = '\xff';
+      const auto tmpfile = makeTempFile("wrong_checksum_empty_zim_file", nastyContent);
+      EXPECT_THROW( zim::Archive(tmpfile->path()), std::runtime_error ) << ctx;
+    }
+  }
+}
+
+TEST(ZimArchive, wrongChecksumInEmptyZimArchive)
+{
+  std::string zimfileContent = emptyZimArchiveContent();
+  zimfileContent[85] = '\xff';
+  const auto tmpfile = makeTempFile("wrong_checksum_empty_zim_file", zimfileContent);
+
+  zim::Archive archive(tmpfile->path());
+  ASSERT_FALSE(archive.check());
+}
+
+
+TEST(ZimArchive, openCreatedArchive)
+{
+  TempFile temp("zimfile");
+  auto tempPath = temp.path();
+  zim::Uuid uuid;
+  // Force special char in the uuid to be sure they are not handled particularly.
+  uuid.data[5] = '\n';
+  uuid.data[10] = '\0';
+
+  zim::writer::Creator creator;
+  creator.setUuid(uuid);
+  creator.configIndexing(true, "eng");
+  creator.startZimCreation(tempPath);
+  auto item = std::make_shared<TestItem>("foo", "text/html", "Foo", "FooContent", IsFrontArticle::YES);
+  creator.addItem(item);
+  // Be sure that title order is not the same that url order
+  item = std::make_shared<TestItem>("foo2", "text/html", "AFoo", "Foo2Content", IsFrontArticle::NO);
+  creator.addItem(item);
+  creator.addMetadata("Title", "This is a title");
+  creator.addIllustration(48, "PNGBinaryContent48");
+  creator.addIllustration(96, "PNGBinaryContent96");
+  creator.setMainPath("foo");
+  creator.addRedirection("foo3", "FooRedirection", "foo"); // No a front article.
+  creator.addRedirection("foo4", "FooRedirection", "NoExistant"); // Invalid redirection, must be removed by creator
+  creator.finishZimCreation();
+
+  zim::Archive archive(tempPath);
+#if !defined(ENABLE_XAPIAN)
+// 2*listingIndex + M/Counter + M/Title + mainpage + 2*Illustration + 2*Item + redirection
+#define ALL_ENTRY_COUNT 10
+#else
+// same as above + 2 xapian indexes.
+#define ALL_ENTRY_COUNT 12
+#endif
+  ASSERT_EQ(archive.getAllEntryCount(), ALL_ENTRY_COUNT);
+#undef ALL_ENTRY_COUNT
+  ASSERT_EQ(archive.getEntryCount(), 3);
+  ASSERT_EQ(archive.getArticleCount(), 1);
+  ASSERT_EQ(archive.getUuid(), uuid);
+  ASSERT_EQ(archive.getMetadataKeys(), std::vector<std::string>({"Counter", "Illustration_48x48@1", "Illustration_96x96@1", "Title"}));
+  ASSERT_EQ(archive.getIllustrationSizes(), std::set<unsigned int>({48, 96}));
+  ASSERT_TRUE(archive.hasMainEntry());
+
+  ASSERT_EQ(archive.getMetadata("Title"), "This is a title");
+  auto titleMeta = archive.getMetadataItem("Title");
+  ASSERT_EQ(std::string(titleMeta.getData()), "This is a title");
+  ASSERT_EQ(titleMeta.getMimetype(), "text/plain;charset=utf-8");
+  ASSERT_EQ(archive.getMetadata("Counter"), "text/html=2");
+  auto illu48 = archive.getIllustrationItem(48);
+  ASSERT_EQ(illu48.getPath(), "Illustration_48x48@1");
+  ASSERT_EQ(std::string(illu48.getData()), "PNGBinaryContent48");
+  auto illu48Meta = archive.getMetadataItem(illu48.getPath());
+  ASSERT_EQ(std::string(illu48Meta.getData()), "PNGBinaryContent48");
+  ASSERT_EQ(illu48Meta.getMimetype(), "image/png");
+  auto illu96 = archive.getIllustrationItem(96);
+  ASSERT_EQ(illu96.getPath(), "Illustration_96x96@1");
+  ASSERT_EQ(std::string(illu96.getData()), "PNGBinaryContent96");
+
+  auto foo = archive.getEntryByPath("foo");
+  ASSERT_EQ(foo.getPath(), "foo");
+  ASSERT_EQ(foo.getTitle(), "Foo");
+  ASSERT_EQ(std::string(foo.getItem().getData()), "FooContent");
+  ASSERT_THROW(foo.getRedirectEntry(), zim::InvalidType);
+  ASSERT_THROW(foo.getRedirectEntryIndex(), zim::InvalidType);
+
+  auto foo2 = archive.getEntryByPath("foo2");
+  ASSERT_EQ(foo2.getPath(), "foo2");
+  ASSERT_EQ(foo2.getTitle(), "AFoo");
+  ASSERT_EQ(std::string(foo2.getItem().getData()), "Foo2Content");
+
+  auto foo3 = archive.getEntryByPath("foo3");
+  ASSERT_EQ(foo3.getPath(), "foo3");
+  ASSERT_EQ(foo3.getTitle(), "FooRedirection");
+  ASSERT_TRUE(foo3.isRedirect());
+  ASSERT_EQ(foo3.getRedirectEntry().getIndex(), foo.getIndex());
+  ASSERT_EQ(foo3.getRedirectEntryIndex(), foo.getIndex());
+
+  auto main = archive.getMainEntry();
+  ASSERT_TRUE(main.isRedirect());
+  ASSERT_EQ(main.getRedirectEntry().getIndex(), foo.getIndex());
+  ASSERT_EQ(main.getRedirectEntryIndex(), foo.getIndex());
+  ASSERT_EQ(archive.getMainEntryIndex(), main.getIndex());
+}
+
+#if WITH_TEST_DATA
+TEST(ZimArchive, openRealZimArchive)
+{
+  const char* const zimfiles[] = {
+    "small.zim",
+    "wikibooks_be_all_nopic_2017-02.zim",
+    "wikibooks_be_all_nopic_2017-02_splitted.zim",
+    "wikipedia_en_climate_change_nopic_2020-01.zim"
+  };
+
+  for ( const std::string fname : zimfiles ) {
+    for (auto& testfile: getDataFilePath(fname)) {
+      const TestContext ctx{ {"path", testfile.path } };
+      std::unique_ptr<zim::Archive> archive;
+      EXPECT_NO_THROW( archive.reset(new zim::Archive(testfile.path)) ) << ctx;
+      if ( archive ) {
+        EXPECT_TRUE( archive->check() ) << ctx;
+      }
+    }
+  }
+}
+
+TEST(ZimArchive, randomEntry)
+{
+  const char* const zimfiles[] = {
+    "wikibooks_be_all_nopic_2017-02.zim",
+    "wikibooks_be_all_nopic_2017-02_splitted.zim",
+    "wikipedia_en_climate_change_nopic_2020-01.zim"
+  };
+
+  for ( const std::string fname : zimfiles ) {
+    for (auto& testfile: getDataFilePath(fname)) {
+      const TestContext ctx{ {"path", testfile.path } };
+      const zim::Archive archive(testfile.path);
+      try {
+        auto randomEntry = archive.getRandomEntry();
+        const auto item = randomEntry.getItem(true);
+        ASSERT_TRUE(item.getMimetype().find("text/html") != std::string::npos) << ctx;
+      } catch (zim::EntryNotFound& e) {
+        FAIL() << "Impossible to find a random Entry in " << fname << ".\n"
+               << "This may occur even if this is not a bug (random will be random).\n"
+               << "Please re-run the tests.";
+      }
+    }
+  }
+}
+
+TEST(ZimArchive, illustration)
+{
+  const char* const zimfiles[] = {
+    "small.zim",
+    "wikibooks_be_all_nopic_2017-02.zim"
+  };
+
+  for ( const std::string fname : zimfiles ) {
+    for (auto& testfile: getDataFilePath(fname)) {
+      const TestContext ctx{ {"path", testfile.path } };
+      const zim::Archive archive(testfile.path);
+      ASSERT_TRUE(archive.hasIllustration(48)) << ctx;
+      auto illustrationItem = archive.getIllustrationItem(48);
+      if(testfile.category == "nons") {
+        ASSERT_EQ(illustrationItem.getPath(), "Illustration_48x48@1") << ctx;
+      } else {
+        ASSERT_EQ(illustrationItem.getPath(), "I/favicon.png") << ctx;
+      }
+      ASSERT_EQ(archive.getIllustrationSizes(), std::set<unsigned int>({48}));
+    }
+  }
+}
+
+struct ZimFileInfo {
+  zim::entry_index_type articleCount, entryCount, allEntryCount;
+};
+
+struct TestDataInfo {
+  const char* const name;
+  ZimFileInfo withnsInfo, nonsInfo;
+
+
+  const ZimFileInfo& getZimFileInfo(const std::string& category) const {
+    if (category == "nons") {
+      return nonsInfo;
+    } else if (category == "withns") {
+      return withnsInfo;
+    }
+    throw std::runtime_error("Unknown category");
+  }
+};
+
+TEST(ZimArchive, articleNumber)
+{
+  TestDataInfo zimfiles[] = {
+     // Name                                           withns                               nons
+     //                                               {articles, userEntries, allEntries}, {articles, userEntries, allEntries}
+    {"small.zim",                                     { 1,       17,          17 },        { 1,       2,           16        }},
+    {"wikibooks_be_all_nopic_2017-02.zim",            { 70,      118,         118},        { 66,      109,         123       }},
+    {"wikibooks_be_all_nopic_2017-02_splitted.zim",   { 70,      118,         118},        { 66,      109,         123       }},
+    {"wikipedia_en_climate_change_nopic_2020-01.zim", { 7253,    7646,        7646},       { 1837,    7633,        7649      }}
+  };
+  // "withns" zim files have no notion of user entries, so EntryCount == allEntryCount.
+  // for small.zim, there is always 1 article, whatever the article is in 'A' namespace or in specific index.
+
+  for ( const auto& testdata : zimfiles ) {
+    for (auto& testfile: getDataFilePath(testdata.name)) {
+      const TestContext ctx{ {"path", testfile.path } };
+      const auto& testZimInfo = testdata.getZimFileInfo(testfile.category);
+      const zim::Archive archive(testfile.path);
+      EXPECT_EQ( archive.getAllEntryCount(), testZimInfo.allEntryCount ) << ctx;
+      EXPECT_EQ( archive.getEntryCount(), testZimInfo.entryCount ) << ctx;
+      EXPECT_EQ( archive.getArticleCount(), testZimInfo.articleCount ) << ctx;
+    }
+  }
+}
+#endif
+
+class CapturedStderr
+{
+  std::ostringstream buffer;
+  std::streambuf* const sbuf;
+public:
+  CapturedStderr()
+    : sbuf(std::cerr.rdbuf())
+  {
+    std::cerr.rdbuf(buffer.rdbuf());
+  }
+
+  CapturedStderr(const CapturedStderr&) = delete;
+
+  ~CapturedStderr()
+  {
+    std::cerr.rdbuf(sbuf);
+  }
+
+  operator std::string() const { return buffer.str(); }
+};
+
+#define EXPECT_BROKEN_ZIMFILE(ZIMPATH, EXPECTED_STDERROR_TEXT) \
+  CapturedStderr stderror;                                     \
+  EXPECT_FALSE(zim::validate(ZIMPATH, checksToRun));           \
+  EXPECT_EQ(EXPECTED_STDERROR_TEXT, std::string(stderror)) << ZIMPATH;
+
+#define TEST_BROKEN_ZIM_NAME(ZIMNAME, EXPECTED)                \
+for(auto& testfile: getDataFilePath(ZIMNAME)) {EXPECT_BROKEN_ZIMFILE(testfile.path, EXPECTED)}
+
+#if WITH_TEST_DATA
+TEST(ZimArchive, validate)
+{
+  zim::IntegrityCheckList all;
+  all.set();
+
+  for(auto& testfile: getDataFilePath("small.zim")) {
+    ASSERT_TRUE(zim::validate(testfile.path, all));
+  }
+
+  zim::IntegrityCheckList checksToRun;
+  checksToRun.set();
+  checksToRun.reset(size_t(zim::IntegrityCheck::CHECKSUM));
+
+  TEST_BROKEN_ZIM_NAME(
+    "invalid.smaller_than_header.zim",
+    "zim-file is too small to contain a header\n"
+  );
+
+  TEST_BROKEN_ZIM_NAME(
+    "invalid.outofbounds_urlptrpos.zim",
+    "Dirent pointer table outside (or not fully inside) ZIM file.\n"
+  );
+
+  for(auto& testfile: getDataFilePath("invalid.outofbounds_titleptrpos.zim")) {
+    std::string expected;
+    if (testfile.category == "withns") {
+      expected = "Title index table outside (or not fully inside) ZIM file.\n";
+    } else {
+      expected = "Full Title index table outside (or not fully inside) ZIM file.\n";
+    }
+    EXPECT_BROKEN_ZIMFILE(testfile.path, expected)
+  }
+
+  TEST_BROKEN_ZIM_NAME(
+    "invalid.outofbounds_clusterptrpos.zim",
+    "Cluster pointer table outside (or not fully inside) ZIM file.\n"
+  );
+
+  TEST_BROKEN_ZIM_NAME(
+    "invalid.invalid_mimelistpos.zim",
+    "mimelistPos must be 80.\n"
+  );
+
+  TEST_BROKEN_ZIM_NAME(
+    "invalid.invalid_checksumpos.zim",
+    "Checksum position is not valid\n"
+  );
+
+  TEST_BROKEN_ZIM_NAME(
+    "invalid.outofbounds_first_direntptr.zim",
+    "Invalid dirent pointer\n"
+  );
+
+  TEST_BROKEN_ZIM_NAME(
+    "invalid.outofbounds_last_direntptr.zim",
+    "Invalid dirent pointer\n"
+  );
+
+  TEST_BROKEN_ZIM_NAME(
+    "invalid.outofbounds_first_title_entry.zim",
+    "Invalid title index entry.\n"
+  );
+
+  TEST_BROKEN_ZIM_NAME(
+    "invalid.outofbounds_last_title_entry.zim",
+    "Invalid title index entry.\n"
+  );
+
+  TEST_BROKEN_ZIM_NAME(
+    "invalid.outofbounds_first_clusterptr.zim",
+    "Invalid cluster pointer\n"
+  );
+
+
+  for(auto& testfile: getDataFilePath("invalid.nonsorted_dirent_table.zim")) {
+    std::string expected;
+    if (testfile.category == "withns") {
+      expected = "Dirent table is not properly sorted:\n"
+                 "  #0: A/main.html\n"
+                 "  #1: -/favicon\n";
+    } else {
+      expected = "Dirent table is not properly sorted:\n"
+                 "  #0: C/main.html\n"
+                 "  #1: C/favicon.png\n";
+    }
+    EXPECT_BROKEN_ZIMFILE(testfile.path, expected)
+  }
+
+  TEST_BROKEN_ZIM_NAME(
+    "invalid.nonsorted_title_index.zim",
+    "Title index is not properly sorted.\n"
+  );
+
+  TEST_BROKEN_ZIM_NAME(
+    "invalid.bad_mimetype_list.zim",
+    "Error getting mimelists.\n"
+  );
+
+  for(auto& testfile: getDataFilePath("invalid.bad_mimetype_in_dirent.zim")) {
+    std::string expected;
+    if (testfile.category == "withns") {
+      expected = "Entry M/Language has invalid MIME-type value 1234.\n";
+    } else {
+      expected = "Entry M/Publisher has invalid MIME-type value 1234.\n";
+    }
+    EXPECT_BROKEN_ZIMFILE(testfile.path, expected)
+  }
+}
+#endif
+
+void checkEquivalence(const zim::Archive& archive1, const zim::Archive& archive2)
+{
+  EXPECT_EQ(archive1.getFilesize(), archive2.getFilesize());
+  EXPECT_EQ(archive1.getClusterCount(), archive2.getClusterCount());
+
+  ASSERT_EQ(archive1.getEntryCount(), archive2.getEntryCount());
+  const zim::Entry mainEntry = archive1.getMainEntry();
+  ASSERT_EQ(mainEntry.getTitle(), archive2.getMainEntry().getTitle());
+
+  ASSERT_NE(0, archive1.getEntryCount()); // ==> below loop is not a noop
+  {
+    auto range1 = archive1.iterEfficient();
+    auto range2 = archive2.iterEfficient();
+    for ( auto it1=range1.begin(), it2=range2.begin(); it1!=range1.end() && it2!=range2.end(); ++it1, ++it2 ) {
+      auto& entry1 = *it1;
+      auto& entry2 = *it2;
+      ASSERT_EQ(entry1.getIndex(), entry2.getIndex());
+      ASSERT_EQ(entry1.getPath(), entry2.getPath());
+      ASSERT_EQ(entry1.getTitle(), entry2.getTitle());
+      ASSERT_EQ(entry1.isRedirect(), entry2.isRedirect());
+      if (!entry1.isRedirect()) {
+        auto item1 = entry1.getItem();
+        auto item2 = entry2.getItem();
+        ASSERT_EQ(item1.getMimetype(), item2.getMimetype());
+        ASSERT_EQ(item1.getSize(), item2.getSize());
+        ASSERT_EQ(item1.getData(), item2.getData());
+      }
+    }
+  }
+
+  {
+    auto range1 = archive1.iterByPath();
+    auto range2 = archive2.iterByPath();
+    for ( auto it1=range1.begin(), it2=range2.begin(); it1!=range1.end() && it2!=range2.end(); ++it1, ++it2 ) {
+      auto& entry1 = *it1;
+      auto& entry2 = *it2;
+
+      ASSERT_EQ(entry1.getIndex(), entry2.getIndex());
+    }
+  }
+
+  {
+    auto range1 = archive1.iterByTitle();
+    auto range2 = archive2.iterByTitle();
+    for ( auto it1=range1.begin(), it2=range2.begin(); it1!=range1.end() && it2!=range2.end(); ++it1, ++it2 ) {
+      auto& entry1 = *it1;
+      auto& entry2 = *it2;
+
+      ASSERT_EQ(entry1.getIndex(), entry2.getIndex());
+    }
+  }
+
+#if defined(ENABLE_XAPIAN)
+  if ( archive1.hasTitleIndex() )
+  {
+    // Resolve any potential redirect.
+    auto mainItem = mainEntry.getItem(true);
+    zim::SuggestionSearcher searcher1(archive1);
+    zim::SuggestionSearcher searcher2(archive2);
+    std::string query = mainItem.getTitle();
+    auto search1 = searcher1.suggest(query);
+    auto search2 = searcher2.suggest(query);
+    ASSERT_NE(0, search1.getEstimatedMatches());
+    ASSERT_EQ(search1.getEstimatedMatches(), search2.getEstimatedMatches());
+
+    auto result1 = search1.getResults(0, archive1.getEntryCount());
+    auto result2 = search2.getResults(0, archive2.getEntryCount());
+    auto firstSearchItem1 = result1.begin().getEntry().getItem(true);
+    auto firstSearchItem2 = result2.begin().getEntry().getItem(true);
+    ASSERT_EQ(mainItem.getPath(), firstSearchItem1.getPath());
+    ASSERT_EQ(mainItem.getPath(), firstSearchItem2.getPath());
+    ASSERT_EQ(result1.size(), result2.size());
+  }
+#endif
+}
+
+#if WITH_TEST_DATA
+TEST(ZimArchive, multipart)
+{
+  auto nonSplittedZims = getDataFilePath("wikibooks_be_all_nopic_2017-02.zim");
+  auto splittedZims = getDataFilePath("wikibooks_be_all_nopic_2017-02_splitted.zim");
+
+  ASSERT_EQ(nonSplittedZims.size(), splittedZims.size()) << "We must have same number of zim files. (This is a test data issue)";
+  for(auto i=0UL; i < nonSplittedZims.size(); i++) {
+    const zim::Archive archive1(nonSplittedZims[i].path);
+    const zim::Archive archive2(splittedZims[i].path);
+    ASSERT_FALSE(archive1.isMultiPart());
+    ASSERT_TRUE (archive2.isMultiPart());
+
+    checkEquivalence(archive1, archive2);
+  }
+}
+
+#ifdef _WIN32
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <io.h>
+#undef min
+#undef max
+# define OPEN_READ_ONLY(path) _open((path).c_str(), _O_RDONLY)
+#else
+# define OPEN_READ_ONLY(path) open((path).c_str(), O_RDONLY)
+#endif
+
+#ifndef _WIN32
+TEST(ZimArchive, openByFD)
+{
+  for(auto& testfile: getDataFilePath("small.zim")) {
+    const zim::Archive archive1(testfile.path);
+    const int fd = OPEN_READ_ONLY(testfile.path);
+    const zim::Archive archive2(fd);
+
+    checkEquivalence(archive1, archive2);
+  }
+}
+
+TEST(ZimArchive, openZIMFileEmbeddedInAnotherFile)
+{
+  auto normalZims = getDataFilePath("small.zim");
+  auto embeddedZims = getDataFilePath("small.zim.embedded");
+
+  ASSERT_EQ(normalZims.size(), embeddedZims.size()) << "We must have same number of zim files. (This is a test data issue)";
+  for(auto i=0UL; i < normalZims.size(); i++) {
+    const zim::Archive archive1(normalZims[i].path);
+    const int fd = OPEN_READ_ONLY(embeddedZims[i].path);
+    const zim::Archive archive2(fd, 8, archive1.getFilesize());
+
+    checkEquivalence(archive1, archive2);
+  }
+}
+#endif // not _WIN32
+#endif // WITH_TEST_DATA
+
+zim::Blob readItemData(const zim::Item::DirectAccessInfo& dai, zim::size_type size)
+{
+  zim::DEFAULTFS::FD fd(zim::DEFAULTFS::openFile(dai.first));
+  std::shared_ptr<char> data(new char[size]);
+  fd.readAt(data.get(), zim::zsize_t(size), zim::offset_t(dai.second));
+  return zim::Blob(data, size);
+}
+
+#if WITH_TEST_DATA
+TEST(ZimArchive, getDirectAccessInformation)
+{
+  for(auto& testfile:getDataFilePath("small.zim")) {
+    const zim::Archive archive(testfile.path);
+    zim::entry_index_type checkedItemCount = 0;
+    for ( auto entry : archive.iterEfficient() ) {
+      if (!entry.isRedirect()) {
+        const TestContext ctx{ {"entry", entry.getPath() } };
+        const auto item = entry.getItem();
+        const auto dai = item.getDirectAccessInformation();
+        if ( dai.first != "" ) {
+          ++checkedItemCount;
+          EXPECT_EQ(item.getData(), readItemData(dai, item.getSize())) << ctx;
+        }
+      }
+    }
+    ASSERT_NE(0, checkedItemCount);
+  }
+}
+
+#ifndef _WIN32
+TEST(ZimArchive, getDirectAccessInformationInAnArchiveOpenedByFD)
+{
+  for(auto& testfile:getDataFilePath("small.zim")) {
+    const int fd = OPEN_READ_ONLY(testfile.path);
+    const zim::Archive archive(fd);
+    zim::entry_index_type checkedItemCount = 0;
+    for ( auto entry : archive.iterEfficient() ) {
+      if (!entry.isRedirect()) {
+        const TestContext ctx{ {"entry", entry.getPath() } };
+        const auto item = entry.getItem();
+        const auto dai = item.getDirectAccessInformation();
+        if ( dai.first != "" ) {
+          ++checkedItemCount;
+          EXPECT_EQ(item.getData(), readItemData(dai, item.getSize())) << ctx;
+        }
+      }
+    }
+    ASSERT_NE(0, checkedItemCount);
+  }
+}
+
+TEST(ZimArchive, getDirectAccessInformationFromEmbeddedArchive)
+{
+  auto normalZims = getDataFilePath("small.zim");
+  auto embeddedZims = getDataFilePath("small.zim.embedded");
+
+  ASSERT_EQ(normalZims.size(), embeddedZims.size()) << "We must have same number of zim files. (This is a test data issue)";
+  for(auto i=0UL; i < normalZims.size(); i++) {
+    const int fd = OPEN_READ_ONLY(embeddedZims[i].path);
+    const auto size = zim::DEFAULTFS::openFile(normalZims[i].path).getSize();
+    const zim::Archive archive(fd, 8, size.v);
+    zim::entry_index_type checkedItemCount = 0;
+    for ( auto entry : archive.iterEfficient() ) {
+      if (!entry.isRedirect()) {
+        const TestContext ctx{ {"entry", entry.getPath() } };
+        const auto item = entry.getItem();
+        const auto dai = item.getDirectAccessInformation();
+        if ( dai.first != "" ) {
+          ++checkedItemCount;
+          EXPECT_EQ(item.getData(), readItemData(dai, item.getSize())) << ctx;
+        }
+      }
+    }
+    ASSERT_NE(0, checkedItemCount);
+  }
+}
+#endif // not _WIN32
+#endif // WITH_TEST_DATA
+
+} // unnamed namespace
diff --git a/test/bufferstreamer.cpp b/test/bufferstreamer.cpp
new file mode 100644
index 0000000..d432c7a
--- /dev/null
+++ b/test/bufferstreamer.cpp
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2020 Veloman Yunkan
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#include "buffer.h"
+#include "bufferstreamer.h"
+#include "endian_tools.h"
+
+#include "gtest/gtest.h"
+
+namespace
+{
+
+using namespace zim;
+
+////////////////////////////////////////////////////////////////////////////////
+// BufferStreamer
+////////////////////////////////////////////////////////////////////////////////
+
+TEST(BufferStreamer, shouldJustWork)
+{
+  char data[] = "abcdefghijklmnopqrstuvwxyz";
+  zim::toLittleEndian(uint32_t(1234), data);
+  zim::toLittleEndian(int64_t(-987654321), data+18);
+
+  auto buffer = Buffer::makeBuffer(data, zsize_t(sizeof(data)));
+  zim::BufferStreamer bds(buffer, zsize_t(sizeof(data)));
+
+  ASSERT_EQ(1234, bds.read<uint32_t>());
+
+  ASSERT_EQ(data + 4, bds.current());
+  const auto blob1 = std::string(bds.current(), 4);
+  bds.skip(zsize_t(4));
+  ASSERT_EQ("efgh", blob1);
+
+  ASSERT_EQ(data + 8, bds.current());
+  const auto blob2 = std::string(bds.current(), 10);
+  bds.skip(zsize_t(10));
+  ASSERT_EQ("ijklmnopqr", blob2);
+
+  ASSERT_EQ(-987654321,   bds.read<int64_t>());
+}
+
+} // unnamed namespace
diff --git a/test/cluster.cpp b/test/cluster.cpp
new file mode 100644
index 0000000..bc59930
--- /dev/null
+++ b/test/cluster.cpp
@@ -0,0 +1,340 @@
+/*
+ * Copyright (C) 2009 Tommi Maekitalo
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#include <algorithm>
+#include <cstdio>
+#include <cstring>
+#include <fstream>
+#include <memory>
+#include <sstream>
+#include <stdexcept>
+#if defined(_MSC_VER)
+# include <BaseTsd.h>
+  typedef SSIZE_T ssize_t;
+#else
+# include <unistd.h>
+#endif
+
+#ifdef _WIN32
+#include <windows.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <io.h>
+#include <fileapi.h>
+#undef min
+#undef max
+#endif
+
+#include "gtest/gtest.h"
+
+#include <zim/zim.h>
+#include <zim/writer/contentProvider.h>
+
+#include "../src/buffer.h"
+#include "../src/cluster.h"
+#include "../src/file_part.h"
+#include "../src/file_compound.h"
+#include "../src/buffer_reader.h"
+#include "../src/writer/cluster.h"
+#include "../src/endian_tools.h"
+#include "../src/config.h"
+
+#include "tools.h"
+
+namespace
+{
+
+using zim::unittests::TempFile;
+using zim::unittests::write_to_buffer;
+
+TEST(ClusterTest, create_cluster)
+{
+  zim::writer::Cluster cluster(zim::Compression::None);
+
+  ASSERT_EQ(cluster.count().v, 0U);
+
+  std::string blob0("123456789012345678901234567890");
+  std::string blob1("ABCDEFGHIJKLMNOPQRSTUVWXYZ");
+  std::string blob2("abcdefghijklmnopqrstuvwxyz");
+
+  cluster.addContent(blob0);
+  cluster.addContent(blob1);
+  cluster.addContent(blob2);
+
+  ASSERT_EQ(cluster.count().v, 3U);
+  ASSERT_EQ(cluster.getBlobSize(zim::blob_index_t(0)).v, blob0.size());
+  ASSERT_EQ(cluster.getBlobSize(zim::blob_index_t(1)).v, blob1.size());
+  ASSERT_EQ(cluster.getBlobSize(zim::blob_index_t(2)).v, blob2.size());
+}
+
+TEST(ClusterTest, read_write_cluster)
+{
+  zim::writer::Cluster cluster(zim::Compression::None);
+
+  std::string blob0("123456789012345678901234567890");
+  std::string blob1("ABCDEFGHIJKLMNOPQRSTUVWXYZ");
+  std::string blob2("abcdefghijklmnop vwxyz");
+
+  cluster.addContent(blob0);
+  cluster.addContent(blob1);
+  cluster.addContent(blob2);
+
+  cluster.close();
+  auto buffer = write_to_buffer(cluster);
+  const auto cluster2shptr = zim::Cluster::read(zim::BufferReader(buffer), zim::offset_t(0));
+  zim::Cluster& cluster2 = *cluster2shptr;
+  ASSERT_EQ(cluster2.getCompression(), zim::Cluster::Compression::None);
+  ASSERT_EQ(cluster2.isExtended, false);
+  ASSERT_EQ(cluster2.count().v, 3U);
+  ASSERT_EQ(cluster2.getBlobSize(zim::blob_index_t(0)).v, blob0.size());
+  ASSERT_EQ(cluster2.getBlobSize(zim::blob_index_t(1)).v, blob1.size());
+  ASSERT_EQ(cluster2.getBlobSize(zim::blob_index_t(2)).v, blob2.size());
+}
+
+TEST(ClusterTest, read_write_no_content)
+{
+  zim::writer::Cluster cluster(zim::Compression::None);
+
+  cluster.close();
+  auto buffer = write_to_buffer(cluster, "\3garbage");
+  const auto cluster2shptr = zim::Cluster::read(zim::BufferReader(buffer), zim::offset_t(0));
+  zim::Cluster& cluster2 = *cluster2shptr;
+  ASSERT_EQ(cluster2.getCompression(), zim::Cluster::Compression::None);
+  ASSERT_EQ(cluster2.isExtended, false);
+  ASSERT_EQ(cluster2.count().v, 0U);
+}
+
+TEST(ClusterTest, read_write_empty)
+{
+  zim::writer::Cluster cluster(zim::Compression::None);
+
+  std::string emptyString;
+
+  cluster.addContent(emptyString);
+  cluster.addContent(emptyString);
+  cluster.addContent(emptyString);
+
+  cluster.close();
+  auto buffer = write_to_buffer(cluster);
+  const auto cluster2shptr = zim::Cluster::read(zim::BufferReader(buffer), zim::offset_t(0));
+  zim::Cluster& cluster2 = *cluster2shptr;
+  ASSERT_EQ(cluster2.getCompression(), zim::Cluster::Compression::None);
+  ASSERT_EQ(cluster2.isExtended, false);
+  ASSERT_EQ(cluster2.count().v, 3U);
+  ASSERT_EQ(cluster2.getBlobSize(zim::blob_index_t(0)).v, 0U);
+  ASSERT_EQ(cluster2.getBlobSize(zim::blob_index_t(1)).v, 0U);
+  ASSERT_EQ(cluster2.getBlobSize(zim::blob_index_t(2)).v, 0U);
+}
+
+TEST(ClusterTest, read_write_clusterZstd)
+{
+  zim::writer::Cluster cluster(zim::Compression::Zstd);
+
+  std::string blob0("123456789012345678901234567890");
+  std::string blob1("ABCDEFGHIJKLMNOPQRSTUVWXYZ");
+  std::string blob2("abcdefghijklmnopqrstuvwxyz");
+
+  cluster.addContent(blob0);
+  cluster.addContent(blob1);
+  cluster.addContent(blob2);
+
+  cluster.close();
+  auto buffer = write_to_buffer(cluster);
+  const auto cluster2shptr = zim::Cluster::read(zim::BufferReader(buffer), zim::offset_t(0));
+  zim::Cluster& cluster2 = *cluster2shptr;
+  ASSERT_EQ(cluster2.isExtended, false);
+  ASSERT_EQ(cluster2.count().v, 3U);
+  ASSERT_EQ(cluster2.getCompression(), zim::Cluster::Compression::Zstd);
+  ASSERT_EQ(cluster2.getBlobSize(zim::blob_index_t(0)).v, blob0.size());
+  ASSERT_EQ(cluster2.getBlobSize(zim::blob_index_t(1)).v, blob1.size());
+  ASSERT_EQ(cluster2.getBlobSize(zim::blob_index_t(2)).v, blob2.size());
+  ASSERT_EQ(blob0, std::string(cluster2.getBlob(zim::blob_index_t(0))));
+  ASSERT_EQ(blob1, std::string(cluster2.getBlob(zim::blob_index_t(1))));
+  ASSERT_EQ(blob2, std::string(cluster2.getBlob(zim::blob_index_t(2))));
+}
+
+class FakeProvider : public zim::writer::ContentProvider
+{
+  public:
+    FakeProvider(zim::size_type size)
+      : size(size),
+        offset(0),
+        buffer(new char[1024*1024U])
+    {
+      memset(buffer.get(), 0, 1024*1024U);
+    }
+
+    zim::size_type getSize() const { return size; }
+    zim::Blob feed() {
+      auto outSize = std::min(zim::size_type(1024*1024), size-offset);
+      auto blob = zim::Blob(buffer.get(), outSize);
+      offset += outSize;
+      return blob;
+    }
+
+  private:
+    zim::size_type size;
+    zim::offset_type offset;
+    std::unique_ptr<char[]> buffer;
+};
+
+TEST(ClusterTest, read_write_extended_cluster)
+{
+  //zim::writer doesn't suport 32 bits architectures.
+  if (SIZE_MAX == UINT32_MAX) {
+    return;
+  }
+
+  char* SKIP_BIG_MEMORY_TEST = std::getenv("SKIP_BIG_MEMORY_TEST");
+  if (SKIP_BIG_MEMORY_TEST != nullptr && std::string(SKIP_BIG_MEMORY_TEST) == "1") {
+    std::cout << "Skip big memory test" << std::endl;
+    return;
+  }
+
+  // MEMÂ = 0
+  std::string blob0("123456789012345678901234567890");
+  std::string blob1("ABCDEFGHIJKLMNOPQRSTUVWXYZ");
+  std::string blob2("abcdefghijklmnopqrstuvwxyz");
+  const uint64_t FOUR_GIB = 4LL * 1024LL*1024LL*1024LL;
+  zim::size_type almost_4g = FOUR_GIB - 16;
+  auto bigProvider = std::unique_ptr<zim::writer::ContentProvider>(new FakeProvider(almost_4g));
+  std::string blob4("zyxwvutsrqponmlkjihgfedcba");
+
+  zim::writer::Cluster cluster(zim::Compression::None);
+  cluster.addContent(blob0);
+  cluster.addContent(blob1);
+  cluster.addContent(blob2);
+  cluster.addContent(std::move(bigProvider));
+  cluster.addContent(blob4);
+
+  ASSERT_GT(cluster.size().v, FOUR_GIB);
+  ASSERT_EQ(cluster.is_extended(), true);
+
+  auto buffer = write_to_buffer(cluster);
+  // 4GiB
+
+  const auto cluster2shptr = zim::Cluster::read(zim::BufferReader(buffer), zim::offset_t(0));
+  zim::Cluster& cluster2 = *cluster2shptr;
+  ASSERT_EQ(cluster2.isExtended, true);
+  ASSERT_EQ(cluster2.count().v, 5U);
+  ASSERT_EQ(cluster2.getCompression(), zim::Cluster::Compression::None);
+  ASSERT_EQ(cluster2.getBlobSize(zim::blob_index_t(0)).v, blob0.size());
+  ASSERT_EQ(cluster2.getBlobSize(zim::blob_index_t(1)).v, blob1.size());
+  ASSERT_EQ(cluster2.getBlobSize(zim::blob_index_t(2)).v, blob2.size());
+  ASSERT_EQ(cluster2.getBlobSize(zim::blob_index_t(3)).v, almost_4g);
+  ASSERT_EQ(blob0, std::string(cluster2.getBlob(zim::blob_index_t(0))));
+  ASSERT_EQ(blob1, std::string(cluster2.getBlob(zim::blob_index_t(1))));
+  ASSERT_EQ(blob2, std::string(cluster2.getBlob(zim::blob_index_t(2))));
+  ASSERT_EQ(blob4, std::string(cluster2.getBlob(zim::blob_index_t(4))));
+}
+
+
+TEST(ClusterTest, read_extended_cluster)
+{
+  char* SKIP_BIG_MEMORY_TEST = std::getenv("SKIP_BIG_MEMORY_TEST");
+  if (SKIP_BIG_MEMORY_TEST != nullptr && std::string(SKIP_BIG_MEMORY_TEST) == "1") {
+    std::cout << "Skip big memory test" << std::endl;
+    return;
+  }
+
+  TempFile tmpfile("extended_cluster");
+  int fd = tmpfile.fd();
+  ssize_t bytes_written;
+
+  std::string blob0("123456789012345678901234567890");
+  std::string blob1("ABCDEFGHIJKLMNOPQRSTUVWXYZ");
+  std::string blob2("abcdefghijklmnopqrstuvwxyz");
+
+  zim::size_type bigger_than_4g = 1024LL*1024LL*1024LL*4LL+1024LL;
+
+  zim::offset_type offset = 5*sizeof(uint64_t);
+
+  char a = 0x11;
+  bytes_written = write(fd, &a, 1);
+
+  char out_buf[sizeof(uint64_t)];
+
+  zim::toLittleEndian(offset, out_buf);
+  bytes_written = write(fd, out_buf, sizeof(uint64_t));
+
+  offset += blob0.size();
+  zim::toLittleEndian(offset, out_buf);
+  bytes_written = write(fd, out_buf, sizeof(uint64_t));
+
+  offset += blob1.size();
+  zim::toLittleEndian(offset, out_buf);
+  bytes_written = write(fd, out_buf, sizeof(uint64_t));
+
+  offset += blob2.size();
+  zim::toLittleEndian(offset, out_buf);
+  bytes_written = write(fd, out_buf, sizeof(uint64_t));
+
+  offset += bigger_than_4g;
+  zim::toLittleEndian(offset, out_buf);
+  bytes_written = write(fd, out_buf, sizeof(uint64_t));
+
+  bytes_written = write(fd, blob0.c_str(), blob0.size());
+  ASSERT_EQ(bytes_written, (ssize_t)blob0.size());
+
+  bytes_written = write(fd, blob1.c_str(), blob1.size());
+  ASSERT_EQ(bytes_written, (ssize_t)blob1.size());
+
+  bytes_written = write(fd, blob2.c_str(), blob2.size());
+  ASSERT_EQ(bytes_written, (ssize_t)blob2.size());
+
+#ifdef _WIN32
+# define LSEEK _lseeki64
+#else
+# define LSEEK lseek
+#endif
+  LSEEK(fd , bigger_than_4g-1, SEEK_CUR);
+#undef LSEEK
+//  std::fseek(tmpfile, bigger_than_4g-1, SEEK_CUR);
+  a = '\0';
+  bytes_written = write(fd, &a, 1);
+  tmpfile.close();
+
+  auto fileCompound = std::make_shared<zim::FileCompound>(tmpfile.path());
+  const auto cluster2shptr = zim::Cluster::read(zim::MultiPartFileReader(fileCompound), zim::offset_t(0));
+  zim::Cluster& cluster2 = *cluster2shptr;
+  ASSERT_EQ(cluster2.isExtended, true);
+  ASSERT_EQ(cluster2.count().v, 4U);
+  ASSERT_EQ(cluster2.getCompression(), zim::Cluster::Compression::None);
+  ASSERT_EQ(cluster2.getBlobSize(zim::blob_index_t(0)).v, blob0.size());
+  ASSERT_EQ(cluster2.getBlobSize(zim::blob_index_t(1)).v, blob1.size());
+  ASSERT_EQ(cluster2.getBlobSize(zim::blob_index_t(2)).v, blob2.size());
+  ASSERT_EQ(cluster2.getBlobSize(zim::blob_index_t(3)).v, bigger_than_4g);
+
+
+  ASSERT_EQ(blob0, std::string(cluster2.getBlob(zim::blob_index_t(0))));
+  ASSERT_EQ(blob1, std::string(cluster2.getBlob(zim::blob_index_t(1))));
+  ASSERT_EQ(blob2, std::string(cluster2.getBlob(zim::blob_index_t(2))));
+
+  const zim::Blob b = cluster2.getBlob(zim::blob_index_t(3));
+  if (SIZE_MAX == UINT32_MAX) {
+    ASSERT_EQ(b.data(), nullptr);
+    ASSERT_EQ(b.size(), 0U);
+  } else {
+    ASSERT_EQ(b.size(), bigger_than_4g);
+  }
+}
+
+
+}  // namespace
diff --git a/test/compression.cpp b/test/compression.cpp
new file mode 100644
index 0000000..a87d540
--- /dev/null
+++ b/test/compression.cpp
@@ -0,0 +1,103 @@
+/*
+ * Copyright (C) 2020 Matthieu Gautier <mgautier@kymeria.fr>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#include <algorithm>
+#include <memory>
+#include "gtest/gtest.h"
+
+#include <zim/zim.h>
+
+#include "../src/compression.h"
+
+namespace
+{
+
+template<typename T>
+class CompressionTest : public testing::Test {
+  protected:
+    typedef zim::Compressor<T> CompressorT;
+    typedef zim::Uncompressor<T> DecompressorT;
+};
+
+using CompressionAlgo = ::testing::Types<
+  ZSTD_INFO
+>;
+
+TYPED_TEST_CASE(CompressionTest, CompressionAlgo);
+
+TYPED_TEST(CompressionTest, compress) {
+  std::string data;
+  data.reserve(100000);
+  for (int i=0; i<100000; i++) {
+    data.append(1, (char)(i%256));
+  }
+  data[99999] = 0;
+
+  auto initialSizes = std::vector<unsigned int>{32, 1024, 1024*1024};
+  auto chunkSizes = std::vector<unsigned long>{32, 512, 1024*1024};
+  for (auto initialSize: initialSizes) {
+    for (auto chunkSize: chunkSizes) {
+      typename TestFixture::CompressorT compressor(initialSize);
+      {
+        bool first=true;
+        unsigned long size = data.size();
+        size_t offset = 0;
+        while (size) {
+          if (first) {
+            compressor.init(const_cast<char*>(data.c_str()));
+            first = false;
+          }
+          auto adjustedChunkSize = std::min(size, chunkSize);
+          compressor.feed(data.c_str()+offset, adjustedChunkSize);
+          offset += adjustedChunkSize;
+          size -= adjustedChunkSize;
+        }
+      }
+
+      zim::zsize_t comp_size;
+      auto comp_data = compressor.get_data(&comp_size);
+
+      typename TestFixture::DecompressorT decompressor(initialSize);
+      {
+        bool first=true;
+        unsigned long size = comp_size.v;
+        size_t offset = 0;
+        while (size) {
+          if (first) {
+            decompressor.init(comp_data.get());
+            first = false;
+          }
+          auto adjustedChunkSize = std::min(size, chunkSize);
+          decompressor.feed(comp_data.get()+offset, adjustedChunkSize);
+          offset += adjustedChunkSize;
+          size -= adjustedChunkSize;
+        }
+      }
+
+      zim::zsize_t decomp_size;
+      auto decomp_data = decompressor.get_data(&decomp_size);
+
+      ASSERT_EQ(decomp_size.v, data.size());
+      ASSERT_EQ(data, std::string(decomp_data.get(), decomp_size.v));
+    }
+  }
+}
+
+
+}  // namespace
diff --git a/test/creator.cpp b/test/creator.cpp
new file mode 100644
index 0000000..c43270c
--- /dev/null
+++ b/test/creator.cpp
@@ -0,0 +1,370 @@
+/*
+ * Copyright (C) 2021 Matthieu Gautier <mgautier@kymeria.fr>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#include <zim/zim.h>
+#include <zim/writer/creator.h>
+#include <zim/writer/item.h>
+#include <zim/writer/contentProvider.h>
+#include <zim/archive.h>
+
+#include "tools.h"
+#include "../src/file_compound.h"
+#include "../src/file_reader.h"
+#include "../src/direntreader.h"
+#include "../src/dirent_accessor.h"
+#include "../src/_dirent.h"
+#include "../src/fileheader.h"
+#include "../src/cluster.h"
+#include "../src/rawstreamreader.h"
+
+#include "gtest/gtest.h"
+
+namespace
+{
+
+using namespace zim;
+
+struct NoneType {};
+const NoneType None;
+
+template<typename T>
+struct Optional{
+  Optional(NoneType none) : active(false) {};
+  Optional(T value) : active(true), value(value) {};
+  void check(const T& value) { if (active) { ASSERT_EQ(this->value, value); } }
+  bool active;
+  T    value;
+};
+
+template<>
+struct Optional<const std::string> {
+  Optional(NoneType none) : active(false) {};
+  Optional(std::string value) : active(true), value(value) {};
+  Optional(const char* value) : active(true), value(value) {};
+  void check(const std::string& value) { if (active) { ASSERT_EQ(this->value, value); } }
+  bool active;
+  std::string value;
+};
+
+void test_article_dirent(
+  std::shared_ptr<const Dirent> dirent,
+  Optional<char> ns,
+  Optional<const std::string> url,
+  Optional<const std::string> title,
+  Optional<uint16_t> mimetype,
+  Optional<cluster_index_t> clusterNumber,
+  Optional<blob_index_t> blobNumber)
+{
+  ASSERT_TRUE(dirent->isArticle());
+  ns.check(dirent->getNamespace());
+  url.check(dirent->getUrl());
+  title.check(dirent->getTitle());
+  mimetype.check(dirent->getMimeType());
+  clusterNumber.check(dirent->getClusterNumber());
+  blobNumber.check(dirent->getBlobNumber());
+}
+
+void test_redirect_dirent(
+  std::shared_ptr<const Dirent> dirent,
+  Optional<char> ns,
+  Optional<const std::string> url,
+  Optional<const std::string> title,
+  Optional<entry_index_t> target)
+{
+  ASSERT_TRUE(dirent->isRedirect());
+  ns.check(dirent->getNamespace());
+  url.check(dirent->getUrl());
+  title.check(dirent->getTitle());
+  target.check(dirent->getRedirectIndex());
+}
+
+TEST(ZimCreator, DoNothing)
+{
+  // Creating a creator instance and do nothing on it should not crash.
+  writer::Creator creator;
+}
+
+TEST(ZimCreator, createEmptyZim)
+{
+  unittests::TempFile temp("emptyzimfile");
+  auto tempPath = temp.path();
+  zim::Uuid uuid;
+  // Force special char in the uuid to be sure they are not handled particularly.
+  uuid.data[5] = '\n';
+  uuid.data[10] = '\0';
+
+  writer::Creator creator;
+  creator.setUuid(uuid);
+  creator.startZimCreation(tempPath);
+  creator.finishZimCreation();
+
+  // Do not use the high level Archive to test that zim file is correctly created but lower structure.
+  auto fileCompound = std::make_shared<FileCompound>(tempPath);
+  auto reader = std::make_shared<MultiPartFileReader>(fileCompound);
+  Fileheader header;
+  header.read(*reader);
+  ASSERT_FALSE(header.hasMainPage());
+  ASSERT_EQ(header.getArticleCount(), 2); // counter + titleListIndexesv0
+
+  //Read the only one item existing.
+  auto urlPtrReader = reader->sub_reader(offset_t(header.getUrlPtrPos()), zsize_t(sizeof(offset_t)*header.getArticleCount()));
+  DirectDirentAccessor direntAccessor(std::make_shared<DirentReader>(reader), std::move(urlPtrReader), entry_index_t(header.getArticleCount()));
+  std::shared_ptr<const Dirent> dirent;
+
+  dirent = direntAccessor.getDirent(entry_index_t(0));
+  test_article_dirent(dirent, 'M', "Counter", None, 1, cluster_index_t(0), None);
+
+  dirent = direntAccessor.getDirent(entry_index_t(1));
+  test_article_dirent(dirent, 'X', "listing/titleOrdered/v0", None, 0, cluster_index_t(1), None);
+  auto v0BlobIndex = dirent->getBlobNumber();
+
+  auto clusterPtrPos = header.getClusterPtrPos();
+  auto clusterOffset = offset_t(reader->read_uint<offset_type>(offset_t(clusterPtrPos+8)));
+  auto cluster = Cluster::read(*reader, clusterOffset);
+  ASSERT_EQ(cluster->getCompression(), Cluster::Compression::None);
+  ASSERT_EQ(cluster->count(), blob_index_t(1)); // Only titleListIndexesv0
+  auto blob = cluster->getBlob(v0BlobIndex);
+  ASSERT_EQ(blob.size(), 2*sizeof(title_index_t));
+}
+
+
+class TestItem : public writer::Item
+{
+  public:
+    TestItem(const std::string& path, const std::string& title, const std::string& content):
+     path(path), title(title), content(content)  { }
+    virtual ~TestItem() = default;
+
+    virtual std::string getPath() const { return path; };
+    virtual std::string getTitle() const { return title; };
+    virtual std::string getMimeType() const { return "text/html"; };
+    virtual writer::Hints getHints() const { return { { writer::FRONT_ARTICLE, 1 } }; }
+
+    virtual std::unique_ptr<writer::ContentProvider> getContentProvider() const {
+      return std::unique_ptr<writer::ContentProvider>(new writer::StringProvider(content));
+    }
+
+  std::string path;
+  std::string title;
+  std::string content;
+};
+
+TEST(ZimCreator, createZim)
+{
+  unittests::TempFile temp("zimfile");
+  auto tempPath = temp.path();
+  zim::Uuid uuid;
+  // Force special char in the uuid to be sure they are not handled particularly.
+  uuid.data[5] = '\n';
+  uuid.data[10] = '\0';
+
+  writer::Creator creator;
+  creator.setUuid(uuid);
+  creator.configIndexing(true, "eng");
+  creator.startZimCreation(tempPath);
+  creator.addRedirection("foo", "WrongRedirection", "foobar", {{zim::writer::FRONT_ARTICLE, true}}); // Will be replaced by item
+  auto item = std::make_shared<TestItem>("foo", "Foo", "FooContent");
+  EXPECT_NO_THROW(creator.addItem(item));
+  EXPECT_THROW(creator.addItem(item), std::runtime_error);
+  // Be sure that title order is not the same that url order
+  item = std::make_shared<TestItem>("foo2", "AFoo", "Foo2Content");
+  creator.addItem(item);
+  creator.addMetadata("Title", "This is a title");
+  creator.addIllustration(48, "PNGBinaryContent48");
+  creator.addIllustration(96, "PNGBinaryContent96");
+  creator.setMainPath("foo");
+  creator.addRedirection("foo3", "FooRedirection", "foo"); // No a front article.
+  creator.addRedirection("foo4", "FooRedirection", "NoExistant", {{zim::writer::FRONT_ARTICLE, true}}); // Invalid redirection, must be removed by creator
+  creator.finishZimCreation();
+
+  // Do not use the high level Archive to test that zim file is correctly created but lower structure.
+  auto fileCompound = std::make_shared<FileCompound>(tempPath);
+  auto reader = std::make_shared<MultiPartFileReader>(fileCompound);
+  Fileheader header;
+  header.read(*reader);
+  ASSERT_TRUE(header.hasMainPage());
+#if defined(ENABLE_XAPIAN)
+  entry_index_type nb_entry = 12; // counter + 2*illustration + xapiantitleIndex + xapianfulltextIndex + foo + foo2 + foo3 + Title + mainPage + titleListIndexes*2
+  int xapian_mimetype = 0;
+  int listing_mimetype = 1;
+  int png_mimetype = 2;
+  int html_mimetype = 3;
+  int plain_mimetype = 4;
+  int plainutf8_mimetype = 5;
+#else
+  entry_index_type nb_entry = 10; // counter + 2*illustration + foo + foo2 + foo3 + Title + mainPage + titleListIndexes*2
+  int listing_mimetype = 0;
+  int png_mimetype = 1;
+  int html_mimetype = 2;
+  int plain_mimetype = 3;
+  int plainutf8_mimetype = 4;
+#endif
+
+  ASSERT_EQ(header.getArticleCount(), nb_entry);
+
+  // Read dirent
+  auto urlPtrReader = reader->sub_reader(offset_t(header.getUrlPtrPos()), zsize_t(sizeof(offset_t)*header.getArticleCount()));
+  DirectDirentAccessor direntAccessor(std::make_shared<DirentReader>(reader), std::move(urlPtrReader), entry_index_t(header.getArticleCount()));
+  std::shared_ptr<const Dirent> dirent;
+
+  entry_index_type direntIdx = 0;
+  dirent = direntAccessor.getDirent(entry_index_t(direntIdx++));
+  test_article_dirent(dirent, 'C', "foo", "Foo", html_mimetype, cluster_index_t(0), None);
+  auto fooBlobIndex = dirent->getBlobNumber();
+
+  dirent = direntAccessor.getDirent(entry_index_t(direntIdx++));
+  test_article_dirent(dirent, 'C', "foo2", "AFoo", html_mimetype, cluster_index_t(0), None);
+  auto foo2BlobIndex = dirent->getBlobNumber();
+
+  dirent = direntAccessor.getDirent(entry_index_t(direntIdx++));
+  test_redirect_dirent(dirent, 'C', "foo3", "FooRedirection", entry_index_t(0));
+
+  dirent = direntAccessor.getDirent(entry_index_t(direntIdx++));
+  test_article_dirent(dirent, 'M', "Counter", None, plain_mimetype, cluster_index_t(0), None);
+  auto counterBlobIndex = dirent->getBlobNumber();
+
+  dirent = direntAccessor.getDirent(entry_index_t(direntIdx++));
+  test_article_dirent(dirent, 'M', "Illustration_48x48@1", None, png_mimetype, cluster_index_t(1), None);
+  auto illustration48BlobIndex = dirent->getBlobNumber();
+
+  dirent = direntAccessor.getDirent(entry_index_t(direntIdx++));
+  test_article_dirent(dirent, 'M', "Illustration_96x96@1", None, png_mimetype, cluster_index_t(1), None);
+  auto illustration96BlobIndex = dirent->getBlobNumber();
+
+  dirent = direntAccessor.getDirent(entry_index_t(direntIdx++));
+  test_article_dirent(dirent, 'M', "Title", "Title", plainutf8_mimetype, cluster_index_t(0), None);
+  auto titleBlobIndex = dirent->getBlobNumber();
+
+  dirent = direntAccessor.getDirent(entry_index_t(direntIdx++));
+  test_redirect_dirent(dirent, 'W', "mainPage", "mainPage", entry_index_t(0));
+
+#if defined(ENABLE_XAPIAN)
+  dirent = direntAccessor.getDirent(entry_index_t(direntIdx++));
+  test_article_dirent(dirent, 'X', "fulltext/xapian", "fulltext/xapian", xapian_mimetype, cluster_index_t(1), None);
+#endif
+
+  dirent = direntAccessor.getDirent(entry_index_t(direntIdx++));
+  test_article_dirent(dirent, 'X', "listing/titleOrdered/v0", None, listing_mimetype, cluster_index_t(1), None);
+  auto v0BlobIndex = dirent->getBlobNumber();
+
+  dirent = direntAccessor.getDirent(entry_index_t(direntIdx++));
+  test_article_dirent(dirent, 'X', "listing/titleOrdered/v1", None, listing_mimetype, cluster_index_t(1), None);
+  auto v1BlobIndex = dirent->getBlobNumber();
+
+#if defined(ENABLE_XAPIAN)
+  dirent = direntAccessor.getDirent(entry_index_t(direntIdx++));
+  test_article_dirent(dirent, 'X', "title/xapian", "title/xapian", xapian_mimetype, cluster_index_t(1), None);
+#endif
+
+  auto clusterPtrPos = header.getClusterPtrPos();
+
+  // Test main content
+  auto clusterOffset = offset_t(reader->read_uint<offset_type>(offset_t(clusterPtrPos)));
+  auto cluster = Cluster::read(*reader, clusterOffset);
+  ASSERT_EQ(cluster->getCompression(), Cluster::Compression::Zstd);
+  ASSERT_EQ(cluster->count(), blob_index_t(4)); // 4 entries are compressed content
+
+  auto blob = cluster->getBlob(fooBlobIndex);
+  ASSERT_EQ(std::string(blob), "FooContent");
+
+  blob = cluster->getBlob(foo2BlobIndex);
+  ASSERT_EQ(std::string(blob), "Foo2Content");
+
+  blob = cluster->getBlob(titleBlobIndex);
+  ASSERT_EQ(std::string(blob), "This is a title");
+
+  blob = cluster->getBlob(counterBlobIndex);
+  ASSERT_EQ(std::string(blob), "text/html=2");
+
+
+  // Test listing content
+  clusterOffset = offset_t(reader->read_uint<offset_type>(offset_t(clusterPtrPos + 8)));
+  cluster = Cluster::read(*reader, clusterOffset);
+  ASSERT_EQ(cluster->getCompression(), Cluster::Compression::None);
+  ASSERT_EQ(cluster->count(), blob_index_t(nb_entry-6)); // 6 entries are either compressed or redirections
+
+  ASSERT_EQ(header.getTitleIdxPos(), (clusterOffset+cluster->getBlobOffset(v0BlobIndex)).v);
+
+  blob = cluster->getBlob(v0BlobIndex);
+  ASSERT_EQ(blob.size(), nb_entry*sizeof(title_index_t));
+  std::vector<char> blob0Data(blob.data(), blob.end());
+  std::vector<char> expectedBlob0Data = {
+    1, 0, 0, 0,
+    0, 0, 0, 0,
+    2, 0, 0, 0,
+    3, 0, 0, 0,
+    4, 0, 0, 0,
+    5, 0, 0, 0,
+    6, 0, 0, 0,
+    7, 0, 0, 0,
+    8, 0, 0, 0,
+    9, 0, 0, 0
+#if defined(ENABLE_XAPIAN)
+    ,10, 0, 0, 0
+    ,11, 0, 0, 0
+#endif
+    };
+  ASSERT_EQ(blob0Data, expectedBlob0Data);
+
+  blob = cluster->getBlob(v1BlobIndex);
+  ASSERT_EQ(blob.size(), 2*sizeof(title_index_t));
+  std::vector<char> blob1Data(blob.data(), blob.end());
+  std::vector<char> expectedBlob1Data = {
+    1, 0, 0, 0,
+    0, 0, 0, 0
+  };
+  ASSERT_EQ(blob1Data, expectedBlob1Data);
+
+  blob = cluster->getBlob(illustration48BlobIndex);
+  ASSERT_EQ(std::string(blob), "PNGBinaryContent48");
+
+  blob = cluster->getBlob(illustration96BlobIndex);
+  ASSERT_EQ(std::string(blob), "PNGBinaryContent96");
+}
+
+
+TEST(ZimCreator, interruptedZimCreation)
+{
+  unittests::TempFile tmpFile("zimfile");
+  {
+    writer::Creator creator;
+    creator.configClusterSize(16*1024);
+    creator.startZimCreation(tmpFile.path());
+    std::ostringstream oss;
+    for ( size_t i = 0; i < 12345; ++i ) {
+      oss << i;
+    }
+    const std::string content(oss.str());
+    for ( char c = 'a'; c <= 'z'; ++c ) {
+      const std::string path(1, c);
+      creator.addItem(std::make_shared<TestItem>(path, path, content));
+    }
+    // creator.finishZimCreation() is not called
+  }
+
+  EXPECT_THROW(
+      {
+        const zim::Archive archive(tmpFile.path());
+      },
+      zim::ZimFileFormatError
+  );
+}
+
+
+} // unnamed namespace
diff --git a/test/decoderstreamreader.cpp b/test/decoderstreamreader.cpp
new file mode 100644
index 0000000..aa740b4
--- /dev/null
+++ b/test/decoderstreamreader.cpp
@@ -0,0 +1,101 @@
+/*
+ * Copyright (C) 2020 Veloman Yunkan
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#include "decoderstreamreader.h"
+#include "buffer_reader.h"
+
+#include "gtest/gtest.h"
+
+namespace
+{
+
+template<class CompressionInfo>
+std::string
+compress(const std::string& data)
+{
+  zim::Compressor<CompressionInfo> compressor(data.size());
+  compressor.init(const_cast<char*>(data.c_str()));
+  compressor.feed(data.c_str(), data.size());
+  zim::zsize_t comp_size;
+  const auto comp_data = compressor.get_data(&comp_size);
+  return std::string(comp_data.get(), comp_size.v);
+}
+
+std::string operator*(const std::string& s, unsigned N)
+{
+  std::string result;
+  for (unsigned i=0; i<N; i++)
+    result += s;
+  return result;
+}
+
+std::string toString(const zim::Buffer& buffer)
+{
+  return std::string(buffer.data(), buffer.size().v);
+}
+
+template<typename T>
+class DecoderStreamReaderTest : public testing::Test {
+  protected:
+    typedef T CompressionInfo;
+};
+
+using CompressionTypes = ::testing::Types<
+  ZSTD_INFO
+>;
+
+TYPED_TEST_CASE(DecoderStreamReaderTest, CompressionTypes);
+
+TYPED_TEST(DecoderStreamReaderTest, justCompressedData) {
+  typedef typename TestFixture::CompressionInfo CompressionInfo;
+
+  const int N = 10;
+  const std::string s("DecoderStreamReader should work correctly");
+  const std::string compDataStr = compress<CompressionInfo>(s*N);
+  auto compData = zim::Buffer::makeBuffer(compDataStr.data(), zim::zsize_t(compDataStr.size()));
+
+  auto compReader = std::make_shared<zim::BufferReader>(compData);
+  zim::DecoderStreamReader<CompressionInfo> dds(compReader);
+  for (int i=0; i<N; i++)
+  {
+    auto decompReader = dds.sub_reader(zim::zsize_t(s.size()));
+    ASSERT_EQ(s, toString(decompReader->get_buffer(zim::offset_t(0), zim::zsize_t(s.size())))) << "i: " << i;
+  }
+}
+
+TYPED_TEST(DecoderStreamReaderTest, compressedDataFollowedByGarbage) {
+  typedef typename TestFixture::CompressionInfo CompressionInfo;
+
+  const int N = 10;
+  const std::string s("DecoderStreamReader should work correctly");
+  std::string compDataStr = compress<CompressionInfo>(s*N);
+  compDataStr += std::string(10, '\0');
+
+  auto compData = zim::Buffer::makeBuffer(compDataStr.data(), zim::zsize_t(compDataStr.size()));
+  auto compReader = std::make_shared<zim::BufferReader>(compData);
+
+  zim::DecoderStreamReader<CompressionInfo> dds(compReader);
+  for (int i=0; i<N; i++)
+  {
+    auto decompReader = dds.sub_reader(zim::zsize_t(s.size()));
+    ASSERT_EQ(s, toString(decompReader->get_buffer(zim::offset_t(0), zim::zsize_t(s.size())))) << "i: " << i;
+  }
+}
+
+} // unnamed namespace
diff --git a/test/defaultIndexdata.cpp b/test/defaultIndexdata.cpp
new file mode 100644
index 0000000..d24ec83
--- /dev/null
+++ b/test/defaultIndexdata.cpp
@@ -0,0 +1,102 @@
+/*
+ * Copyright (C) 2021 Matthieu Gautier <mgautier@kymeria.fr>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+
+#include <zim/writer/contentProvider.h>
+
+#include "../src/writer/defaultIndexData.h"
+#include "gtest/gtest.h"
+
+namespace {
+
+  std::unique_ptr<zim::writer::IndexData> index_data(const std::string& content, const std::string& title)
+  {
+    std::unique_ptr<zim::writer::ContentProvider> contentProvider(new zim::writer::StringProvider(content));
+    return std::unique_ptr<zim::writer::IndexData>(new zim::writer::DefaultIndexData(std::move(contentProvider), title));
+  }
+
+  TEST(DefaultIndexdata, empty) {
+    auto indexData = index_data("", "A Title");
+
+    ASSERT_EQ(indexData->hasIndexData(), false);
+    ASSERT_EQ(indexData->getTitle(), "a title");
+    ASSERT_EQ(indexData->getContent(), "");
+    ASSERT_EQ(indexData->getKeywords(), "");
+    ASSERT_EQ(indexData->getWordCount(), 0);
+    ASSERT_EQ(indexData->getGeoPosition(), std::make_tuple(false, 0, 0));
+  }
+
+  TEST(DefaultIndexdata, simple) {
+    auto indexData = index_data("<html><body>Some <b>bold</b> words</body><html>", "A Title");
+
+    ASSERT_EQ(indexData->hasIndexData(), true);
+    ASSERT_EQ(indexData->getTitle(), "a title");
+    ASSERT_EQ(indexData->getContent(), "some bold words");
+    ASSERT_EQ(indexData->getKeywords(), "");
+    ASSERT_EQ(indexData->getWordCount(), 3);
+    ASSERT_EQ(indexData->getGeoPosition(), std::make_tuple(false, 0, 0));
+  }
+
+  TEST(DefaultIndexdata, noindexhead) {
+    auto indexData = index_data(R"(<html><head><meta name="robots" content="noindex"></head><body>Some <b>bold</b> words</body><html>)", "A Title");
+
+    ASSERT_EQ(indexData->hasIndexData(), false);
+    ASSERT_EQ(indexData->getTitle(), "a title");
+    ASSERT_EQ(indexData->getContent(), "");
+    ASSERT_EQ(indexData->getKeywords(), "");
+    ASSERT_EQ(indexData->getWordCount(), 0);
+    ASSERT_EQ(indexData->getGeoPosition(), std::make_tuple(false, 0, 0));
+  }
+
+  TEST(DefaultIndexdata, noindexnone) {
+    auto indexData = index_data(R"(<html><head><meta name="robots" content="none"></head><body>Some <b>bold</b> words</body><html>)", "A Title");
+
+    ASSERT_EQ(indexData->hasIndexData(), false);
+    ASSERT_EQ(indexData->getTitle(), "a title");
+    ASSERT_EQ(indexData->getContent(), "");
+    ASSERT_EQ(indexData->getKeywords(), "");
+    ASSERT_EQ(indexData->getWordCount(), 0);
+    ASSERT_EQ(indexData->getGeoPosition(), std::make_tuple(false, 0, 0));
+  }
+
+ TEST(DefaultIndexdata, noindexbody) {
+    auto indexData = index_data("<html><body>NOINDEXSome <b>bold</b> words</body><html>", "A Title");
+
+    ASSERT_EQ(indexData->hasIndexData(), false);
+    ASSERT_EQ(indexData->getTitle(), "a title");
+    ASSERT_EQ(indexData->getContent(), "noindexsome bold words");
+    ASSERT_EQ(indexData->getKeywords(), "");
+    ASSERT_EQ(indexData->getWordCount(), 3);
+    ASSERT_EQ(indexData->getGeoPosition(), std::make_tuple(false, 0, 0));
+  }
+
+  TEST(DefaultIndexdata, full) {
+    auto indexData = index_data(R"(<html><head><meta name="keywords" content="some keyword important"><meta name="geo.position" content="45.005;10.100"></head><body>Some <b>bold</b> words</body><html>)", "A Title");
+
+    ASSERT_EQ(indexData->hasIndexData(), true);
+    ASSERT_EQ(indexData->getTitle(), "a title");
+    ASSERT_EQ(indexData->getContent(), "some bold words");
+    ASSERT_EQ(indexData->getKeywords(), "some keyword important");
+    ASSERT_EQ(indexData->getWordCount(), 3);
+    auto geoPos = indexData->getGeoPosition();
+    ASSERT_TRUE(std::get<0>(geoPos));
+    ASSERT_TRUE(std::abs(std::get<1>(geoPos)-45.005) < 0.00001);
+    ASSERT_TRUE(std::abs(std::get<2>(geoPos)-10.1) < 0.00001);
+  }
+}
diff --git a/test/dirent.cpp b/test/dirent.cpp
new file mode 100644
index 0000000..5723abd
--- /dev/null
+++ b/test/dirent.cpp
@@ -0,0 +1,206 @@
+/*
+ * Copyright (C) 2009 Tommi Maekitalo
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#include <cstring>
+#include <iostream>
+#include <sstream>
+#include <memory>
+#include <stdexcept>
+
+#ifdef _WIN32
+#include <windows.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <io.h>
+#include <fileapi.h>
+#endif
+
+#include "gtest/gtest.h"
+
+#include "../src/buffer.h"
+#include "../src/_dirent.h"
+#include "../src/direntreader.h"
+#include "../src/buffer_reader.h"
+#include "../src/writer/_dirent.h"
+
+#include "tools.h"
+
+namespace
+{
+
+using zim::unittests::TempFile;
+using zim::unittests::write_to_buffer;
+using zim::writer::NS;
+
+zim::Dirent read_from_buffer(const zim::Buffer& buf)
+{
+  zim::DirentReader direntReader(std::make_shared<zim::BufferReader>(buf));
+  return *direntReader.readDirent(zim::offset_t(0));
+}
+
+size_t writenDirentSize(const zim::writer::Dirent& dirent)
+{
+  TempFile tmpFile("test_dirent");
+  const auto tmp_fd = tmpFile.fd();
+  dirent.write(tmp_fd);
+  auto size = lseek(tmp_fd, 0, SEEK_END);
+  return size;
+}
+
+TEST(DirentTest, size)
+{
+#ifdef _WIN32
+  ASSERT_EQ(sizeof(zim::writer::Dirent), 72);
+#else
+  // Dirent's size is important for us as we are creating huge zim files on linux
+  // and we need to store a lot of dirents.
+  // Be sure that dirent's size is not increased by any change.
+#if ENV32BIT
+  // On 32 bits, Dirent is smaller.
+  ASSERT_EQ(sizeof(zim::writer::Dirent), 30);
+#else
+  ASSERT_EQ(sizeof(zim::writer::Dirent), 38);
+#endif
+#endif
+}
+
+TEST(DirentTest, set_get_data_dirent)
+{
+  zim::Dirent dirent;
+  dirent.setUrl('C', "Bar");
+  dirent.setItem(17, zim::cluster_index_t(45), zim::blob_index_t(1234));
+  dirent.setVersion(54346);
+
+  ASSERT_TRUE(!dirent.isRedirect());
+  ASSERT_EQ(dirent.getNamespace(), 'C');
+  ASSERT_EQ(dirent.getUrl(), "Bar");
+  ASSERT_EQ(dirent.getTitle(), "Bar");
+  ASSERT_EQ(dirent.getParameter(), "");
+  ASSERT_EQ(dirent.getBlobNumber().v, 1234U);
+  ASSERT_EQ(dirent.getVersion(), 54346U);
+
+  dirent.setTitle("Foo");
+  ASSERT_EQ(dirent.getNamespace(), 'C');
+  ASSERT_EQ(dirent.getUrl(), "Bar");
+  ASSERT_EQ(dirent.getTitle(), "Foo");
+  ASSERT_EQ(dirent.getParameter(), "");
+}
+
+TEST(DirentTest, read_write_article_dirent)
+{
+  zim::writer::Dirent dirent(NS::C, "Bar", "Foo", 17);
+  zim::writer::Cluster cluster(zim::Compression::None);
+  cluster.addContent(""); // Add a dummy content
+  cluster.setClusterIndex(zim::cluster_index_t(45));
+  dirent.setCluster(&cluster);
+
+  ASSERT_TRUE(dirent.isItem());
+  ASSERT_EQ(dirent.getNamespace(), NS::C);
+  ASSERT_EQ(dirent.getPath(), "Bar");
+  ASSERT_EQ(dirent.getTitle(), "Foo");
+  ASSERT_EQ(dirent.getClusterNumber().v, 45U);
+  ASSERT_EQ(dirent.getBlobNumber().v, 1U);
+  ASSERT_EQ(dirent.getVersion(), 0U);
+
+  auto buffer = write_to_buffer(dirent);
+  zim::Dirent dirent2(read_from_buffer(buffer));
+
+  ASSERT_TRUE(!dirent2.isRedirect());
+  ASSERT_EQ(dirent2.getNamespace(), 'C');
+  ASSERT_EQ(dirent2.getTitle(), "Foo");
+  ASSERT_EQ(dirent2.getParameter(), "");
+  ASSERT_EQ(dirent2.getClusterNumber().v, 45U);
+  ASSERT_EQ(dirent2.getBlobNumber().v, 1U);
+  ASSERT_EQ(dirent2.getVersion(), 0U);
+}
+
+TEST(DirentTest, read_write_article_dirent_unicode)
+{
+  zim::writer::Dirent dirent(NS::C, "L\xc3\xbcliang", "", 17);
+  zim::writer::Cluster cluster(zim::Compression::None);
+  cluster.addContent(""); // Add a dummy content
+  cluster.setClusterIndex(zim::cluster_index_t(45));
+  dirent.setCluster(&cluster);
+
+  ASSERT_TRUE(dirent.isItem());
+  ASSERT_EQ(dirent.getNamespace(), NS::C);
+  ASSERT_EQ(dirent.getPath(), "L\xc3\xbcliang");
+  ASSERT_EQ(dirent.getTitle(), "L\xc3\xbcliang");
+  ASSERT_EQ(dirent.getClusterNumber().v, 45U);
+  ASSERT_EQ(dirent.getBlobNumber().v, 1U);
+
+  auto buffer = write_to_buffer(dirent);
+  zim::Dirent dirent2(read_from_buffer(buffer));
+
+  ASSERT_TRUE(!dirent2.isRedirect());
+  ASSERT_EQ(dirent2.getNamespace(), 'C');
+  ASSERT_EQ(dirent2.getUrl(), "L\xc3\xbcliang");
+  ASSERT_EQ(dirent2.getTitle(), "L\xc3\xbcliang");
+  ASSERT_EQ(dirent2.getParameter(), "");
+  ASSERT_EQ(dirent2.getClusterNumber().v, 45U);
+  ASSERT_EQ(dirent2.getBlobNumber().v, 1U);
+}
+
+TEST(DirentTest, read_write_redirect_dirent)
+{
+  zim::writer::Dirent targetDirent(NS::C, "Foo", "", 17);
+  targetDirent.setIdx(zim::entry_index_t(321));
+  zim::writer::Dirent dirent(NS::C, "Bar", "", NS::C, "Foo");
+  ASSERT_EQ(dirent.getRedirectNs(), NS::C);
+  ASSERT_EQ(dirent.getRedirectPath(), "Foo");
+  dirent.setRedirect(&targetDirent);
+
+  ASSERT_TRUE(dirent.isRedirect());
+  ASSERT_EQ(dirent.getNamespace(), NS::C);
+  ASSERT_EQ(dirent.getPath(), "Bar");
+  ASSERT_EQ(dirent.getRedirectIndex().v, 321U);
+
+  auto buffer = write_to_buffer(dirent);
+  zim::Dirent dirent2(read_from_buffer(buffer));
+
+  ASSERT_TRUE(dirent2.isRedirect());
+  ASSERT_EQ(dirent2.getNamespace(), 'C');
+  ASSERT_EQ(dirent2.getUrl(), "Bar");
+  ASSERT_EQ(dirent2.getTitle(), "Bar");
+  ASSERT_EQ(dirent2.getRedirectIndex().v, 321U);
+}
+
+TEST(DirentTest, dirent_size)
+{
+  // case url set, title empty, extralen empty
+  zim::writer::Dirent dirent(NS::C, "Bar", "", 17);
+  ASSERT_EQ(dirent.getDirentSize(), writenDirentSize(dirent));
+
+  // case url set, title set, extralen empty
+  zim::writer::Dirent dirent2(NS::C, "Bar", "Foo", 17);
+  ASSERT_EQ(dirent2.getDirentSize(), writenDirentSize(dirent2));
+}
+
+TEST(DirentTest, redirect_dirent_size)
+{
+  zim::writer::Dirent targetDirent(NS::C, "Foo", "", 17);
+  targetDirent.setIdx(zim::entry_index_t(321));
+  zim::writer::Dirent dirent(NS::C, "Bar", "", NS::C, "Foo");
+  dirent.setRedirect(&targetDirent);
+
+  ASSERT_EQ(dirent.getDirentSize(), writenDirentSize(dirent));
+}
+
+}  // namespace
diff --git a/test/dirent_lookup.cpp b/test/dirent_lookup.cpp
new file mode 100644
index 0000000..f3a11ea
--- /dev/null
+++ b/test/dirent_lookup.cpp
@@ -0,0 +1,188 @@
+/*
+ * Copyright (C) 2020 Matthieu Gautier <mgautier@kymeria.fr>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#include "../src/dirent_lookup.h"
+#include "../src/_dirent.h"
+#include <zim/zim.h>
+
+#include "gtest/gtest.h"
+
+#include <vector>
+#include <string>
+#include <utility>
+
+namespace
+{
+
+const std::vector<std::pair<char, std::string>> articleurl = {
+  {'A', "aa"},       //0
+  {'A', "aaaa"},     //1
+  {'A', "aaaaaa"},   //2
+  {'A', "aaaabb"},   //3
+  {'A', "aaaacc"},   //4
+  {'A', "aabbaa"},   //5
+  {'A', "aabbbb"},   //6
+  {'A', "aabbcc"},   //7
+  {'A', "cccccc"},   //8
+  {'M', "foo"},      //9
+  {'a', "aa"},       //10
+  {'a', "bb"},       //11
+  {'b', "aa"}        //12
+};
+
+struct GetDirentMock
+{
+  typedef GetDirentMock DirentAccessorType;
+  typedef zim::entry_index_t index_t;
+  static const std::string& getDirentKey(const zim::Dirent& d) {
+    return d.getUrl();
+  }
+
+  zim::entry_index_t getDirentCount() const {
+    return zim::entry_index_t(articleurl.size());
+  }
+
+  std::shared_ptr<const zim::Dirent> getDirent(zim::entry_index_t idx) const {
+    auto info = articleurl.at(idx.v);
+    auto ret = std::make_shared<zim::Dirent>();
+    ret->setUrl(info.first, info.second);
+    return ret;
+  }
+};
+
+class NamespaceBoundaryTest : public :: testing::Test
+{
+  protected:
+    GetDirentMock dirents;
+};
+
+TEST_F(NamespaceBoundaryTest, BeginOffset)
+{
+  ASSERT_EQ(zim::getNamespaceBeginOffset(dirents, 'a').v, 10);
+  ASSERT_EQ(zim::getNamespaceBeginOffset(dirents, 'b').v, 12);
+  ASSERT_EQ(zim::getNamespaceBeginOffset(dirents, 'c').v, 13);
+  ASSERT_EQ(zim::getNamespaceBeginOffset(dirents, 'A'-1).v, 0);
+  ASSERT_EQ(zim::getNamespaceBeginOffset(dirents, 'A').v, 0);
+  ASSERT_EQ(zim::getNamespaceBeginOffset(dirents, 'M').v, 9);
+  ASSERT_EQ(zim::getNamespaceBeginOffset(dirents, 'U').v, 10);
+}
+
+TEST_F(NamespaceBoundaryTest, EndOffset)
+{
+  ASSERT_EQ(zim::getNamespaceEndOffset(dirents, 'a').v, 12);
+  ASSERT_EQ(zim::getNamespaceEndOffset(dirents, 'b').v, 13);
+  ASSERT_EQ(zim::getNamespaceEndOffset(dirents, 'c').v, 13);
+  ASSERT_EQ(zim::getNamespaceEndOffset(dirents, 'A'-1).v, 0);
+  ASSERT_EQ(zim::getNamespaceEndOffset(dirents, 'A').v, 9);
+  ASSERT_EQ(zim::getNamespaceEndOffset(dirents, 'M').v, 10);
+  ASSERT_EQ(zim::getNamespaceEndOffset(dirents, 'U').v, 10);
+}
+
+TEST_F(NamespaceBoundaryTest, EndEqualsStartOfNext)
+{
+  for (char ns=32; ns<127; ns++){
+    std::cout << "ns: " << ns << "|" << (int)ns << std::endl;
+    ASSERT_EQ(zim::getNamespaceEndOffset(dirents, ns).v, zim::getNamespaceBeginOffset(dirents, ns+1).v);
+  }
+}
+
+
+class DirentLookupTest : public :: testing::Test
+{
+  protected:
+    GetDirentMock dirents;
+};
+
+typedef zim::DirentLookup<GetDirentMock> DirentLookupType;
+
+// Provide access to protected functionality in order to unit-test it
+struct UnprotectedDirentLookup : DirentLookupType
+{
+  template<typename... T> UnprotectedDirentLookup(const T&... args)
+    : DirentLookupType(args...)
+  {}
+
+  using DirentLookupType::compareWithDirentAt;
+};
+
+
+TEST_F(DirentLookupTest, compareWithDirentAt)
+{
+  UnprotectedDirentLookup direntLookup(&dirents);
+
+  // Dirent at index 9 is {'M', "foo"}
+  EXPECT_LE(direntLookup.compareWithDirentAt('A', "foo", 9), 0);
+  EXPECT_LE(direntLookup.compareWithDirentAt('M', "boo", 9), 0);
+  EXPECT_EQ(direntLookup.compareWithDirentAt('M', "foo", 9), 0);
+  EXPECT_GE(direntLookup.compareWithDirentAt('M', "for", 9), 0);
+  EXPECT_GE(direntLookup.compareWithDirentAt('N', "foo", 9), 0);
+}
+
+
+#define CHECK_FIND_RESULT(expr, is_exact_match, expected_value) \
+  { \
+    const auto findResult = expr; \
+    ASSERT_EQ(findResult.first, is_exact_match); \
+    ASSERT_EQ(findResult.second.v, expected_value); \
+  }
+
+TEST_F(DirentLookupTest, ExactMatch)
+{
+  zim::DirentLookup<GetDirentMock> direntLookup(&dirents);
+  zim::FastDirentLookup<GetDirentMock> fast_direntLookup(&dirents, 4);
+
+#define CHECK_EXACT_MATCH(expr, expected_value)         \
+  CHECK_FIND_RESULT(expr,        true, expected_value); \
+  CHECK_FIND_RESULT(fast_##expr, true, expected_value);
+
+  CHECK_EXACT_MATCH(direntLookup.find('A', "aa"), 0);
+  CHECK_EXACT_MATCH(direntLookup.find('a', "aa"), 10);
+  CHECK_EXACT_MATCH(direntLookup.find('A', "aabbbb"), 6);
+  CHECK_EXACT_MATCH(direntLookup.find('b', "aa"), 12);
+
+#undef CHECK_EXACT_MATCH
+}
+
+
+TEST_F(DirentLookupTest, NoExactMatch)
+{
+  zim::DirentLookup<GetDirentMock> direntLookup(&dirents);
+  zim::FastDirentLookup<GetDirentMock> fast_direntLookup(&dirents, 4);
+
+#define CHECK_NOEXACT_MATCH(expr, expected_value)        \
+  CHECK_FIND_RESULT(expr,        false, expected_value); \
+  CHECK_FIND_RESULT(fast_##expr, false, expected_value);
+
+  CHECK_NOEXACT_MATCH(direntLookup.find('A', "ABC"), 0);
+  CHECK_NOEXACT_MATCH(direntLookup.find('U', "aa"), 10); // No U namespace => return 10 (the index of the first item from the next namespace)
+  CHECK_NOEXACT_MATCH(direntLookup.find('A', "aabb"), 5); // aabb is between aaaacc (4) and aabbaa (5) => 5
+  CHECK_NOEXACT_MATCH(direntLookup.find('A', "aabbb"), 6); // aabbb is between aabbaa (5) and aabbbb (6) => 6
+  CHECK_NOEXACT_MATCH(direntLookup.find('A', "aabbbc"), 7); // aabbbc is between aabbbb (6) and aabbcc (7) => 7
+  CHECK_NOEXACT_MATCH(direntLookup.find('A', "bb"), 8); // bb is between aabbcc (7) and cccccc (8) => 8
+  CHECK_NOEXACT_MATCH(direntLookup.find('A', "dd"), 9); // dd is after cccccc (8) => 9
+  CHECK_NOEXACT_MATCH(direntLookup.find('M', "f"), 9); // f is before foo (9) => 9
+  CHECK_NOEXACT_MATCH(direntLookup.find('M', "bar"), 9); // bar is before foo (9) => 9
+  CHECK_NOEXACT_MATCH(direntLookup.find('M', "foo1"), 10); // foo1 is after foo (9) => 10
+  CHECK_NOEXACT_MATCH(direntLookup.find('z', "zz"), 13);
+
+#undef CHECK_NOEXACT_MATCH
+}
+
+
+}  // namespace
diff --git a/test/find.cpp b/test/find.cpp
new file mode 100644
index 0000000..68c6f31
--- /dev/null
+++ b/test/find.cpp
@@ -0,0 +1,298 @@
+/*
+ * Copyright (C) 2009 Miguel Rocha
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#include <zim/zim.h>
+#include <zim/archive.h>
+#include <zim/error.h>
+
+#include "tools.h"
+
+#include "gtest/gtest.h"
+
+namespace
+{
+// Not found cases
+
+
+using zim::unittests::getDataFilePath;
+using zim::unittests::TempZimArchive;
+using zim::unittests::TestItem;
+
+// ByTitle
+#if WITH_TEST_DATA
+TEST(FindTests, NotFoundByTitle)
+{
+    for(auto& testfile:getDataFilePath("wikibooks_be_all_nopic_2017-02.zim")) {
+        zim::Archive archive (testfile.path);
+
+        auto range0 = archive.findByTitle("unkownTitle");
+        auto range1 = archive.findByTitle("j/body.js");
+        ASSERT_EQ(range0.begin(), range0.end());
+        ASSERT_EQ(range1.begin(), range1.end());
+    }
+}
+
+// By Path
+TEST(FindTests, NotFoundByPath)
+{
+    for(auto& testfile:getDataFilePath("wikibooks_be_all_nopic_2017-02.zim")) {
+        zim::Archive archive (testfile.path);
+
+        auto range0 = archive.findByPath("unkwonUrl");
+        auto range1 = archive.findByPath("U/unkwonUrl");
+        auto range2 = archive.findByPath("A/unkwonUrl");
+        auto range3 = archive.findByPath("X");
+        auto range4 = archive.findByPath("X/");
+        ASSERT_EQ(range0.begin(), range0.end());
+        ASSERT_EQ(range1.begin(), range1.end());
+        ASSERT_EQ(range2.begin(), range2.end());
+        ASSERT_EQ(range3.begin(), range3.end());
+        ASSERT_EQ(range4.begin(), range4.end());
+    }
+}
+
+// Found cases
+
+// ByTitle
+TEST(FindTests, ByTitle)
+{
+    for(auto& testfile:getDataFilePath("wikibooks_be_all_nopic_2017-02.zim")) {
+        zim::Archive archive (testfile.path);
+
+        auto range0 = archive.findByTitle("ÐÐµÑÑÐ°Ñ ÑÑÐ°ÑÐ¾Ð½ÐºÐ°");
+
+        auto count = 0;
+        for(auto& entry: range0) {
+          count++;
+          ASSERT_EQ(entry.getTitle().find("ÐÐµÑÑÐ°Ñ ÑÑÐ°ÑÐ¾Ð½ÐºÐ°"), 0);
+        }
+        if (testfile.category == "withns") {
+          // On the withns test file, there are two entry with this title:
+          //  the entry itself and the index.html (a redirection)
+          ASSERT_EQ(count, 2);
+        } else {
+          // On new test file, the main page redirection is store in `W` namespace,
+          // so the findByTitle found only 1 entry in `C` namespace.
+          ASSERT_EQ(count, 1);
+        }
+
+        auto range1 = archive.findByTitle("Ð£ÐºÑÐ°ÑÐ½ÑÐºÐ°Ñ");
+
+        count = 0;
+        for(auto& entry: range1) {
+          count++;
+          ASSERT_EQ(entry.getTitle().find("Ð£ÐºÑÐ°ÑÐ½ÑÐºÐ°Ñ"), 0);
+        }
+        ASSERT_EQ(count, 5);
+
+        // Offset from end
+        auto range2 = archive.findByTitle("Ð£ÐºÑÐ°ÑÐ½ÑÐºÐ°Ñ");
+        range2 = range2.offset(0, 2);
+        count = 0;
+        for(auto& entry: range2) {
+          count++;
+          ASSERT_EQ(entry.getTitle().find("Ð£ÐºÑÐ°ÑÐ½ÑÐºÐ°Ñ"), 0);
+        }
+        ASSERT_EQ(count, 2);
+
+        // Offset from start
+        auto range3 = archive.findByTitle("Ð£ÐºÑÐ°ÑÐ½ÑÐºÐ°Ñ");
+        range3 = range3.offset(1, 4);
+        count = 0;
+        for(auto& entry: range3) {
+          count++;
+          ASSERT_EQ(entry.getTitle().find("Ð£ÐºÑÐ°ÑÐ½ÑÐºÐ°Ñ"), 0);
+        }
+        ASSERT_EQ(count, 4);
+
+        // Offset with more max results greater than the number of results
+        auto range4 = archive.findByTitle("Ð£ÐºÑÐ°ÑÐ½ÑÐºÐ°Ñ");
+        range4 = range4.offset(0, 10);
+        count = 0;
+        for(auto& entry: range4) {
+          count++;
+          ASSERT_EQ(entry.getTitle().find("Ð£ÐºÑÐ°ÑÐ½ÑÐºÐ°Ñ"), 0);
+        }
+        ASSERT_EQ(count, 5);
+
+        // Offset with start greater than the number of results
+        auto range5 = archive.findByTitle("Ð£ÐºÑÐ°ÑÐ½ÑÐºÐ°Ñ");
+        range5 = range5.offset(10, 5);
+        count = 0;
+        for(auto& entry: range5) {
+          count++;
+          ASSERT_EQ(entry.getTitle().find("Ð£ÐºÑÐ°ÑÐ½ÑÐºÐ°Ñ"), 0);
+        }
+        ASSERT_EQ(count, 0);
+    }
+}
+
+#define CHECK_FIND_TITLE_COUNT(prefix, expected_count) \
+{ \
+  auto count = 0; \
+  auto range = archive.findByTitle(prefix); \
+  for(auto& entry: range) { \
+    count++; \
+    ASSERT_EQ(entry.getTitle().find(prefix), 0); \
+  } \
+  ASSERT_EQ(count, expected_count); \
+}
+
+TEST(FindTests, ByTitleWithDuplicate)
+{
+  TempZimArchive tza("testZim");
+  zim::writer::Creator creator;
+  creator.startZimCreation(tza.getPath());
+  creator.addItem(std::make_shared<TestItem>("article0", "text/html", "AAA", ""));
+  creator.addItem(std::make_shared<TestItem>("article1", "text/html", "BB", ""));
+  creator.addItem(std::make_shared<TestItem>("article2", "text/html", "BBB", ""));
+  creator.addItem(std::make_shared<TestItem>("article3", "text/html", "BBB", ""));
+  creator.addItem(std::make_shared<TestItem>("article4", "text/html", "BBBB", ""));
+  creator.addItem(std::make_shared<TestItem>("article5", "text/html", "CCC", ""));
+  creator.addItem(std::make_shared<TestItem>("article6", "text/html", "CCC", ""));
+  creator.finishZimCreation();
+
+  zim::Archive archive(tza.getPath());
+  // First binary seach step will look for index 3 (0+6/2) which is a BBB,
+  // but we want to be sure it returns article2 which is the start of the range "BBB*"
+  CHECK_FIND_TITLE_COUNT("BBB", 3)
+  CHECK_FIND_TITLE_COUNT("BB", 4)
+  CHECK_FIND_TITLE_COUNT("BBBB", 1)
+  CHECK_FIND_TITLE_COUNT("CCC", 2)
+  CHECK_FIND_TITLE_COUNT("C", 2)
+}
+
+
+// By Path
+TEST(FindTests, ByPath)
+{
+  for(auto& testfile:getDataFilePath("wikibooks_be_all_nopic_2017-02.zim", "withns")) {
+    zim::Archive archive (testfile.path);
+
+    auto range0 = archive.findByPath("A/Main_Page.html");
+    auto range1 = archive.findByPath("I/s/");
+    auto range2 = archive.findByPath("-/j/head.js");
+    auto range3 = archive.findByPath("I");
+    auto range4 = archive.findByPath("I/");
+    auto range5 = archive.findByPath("");
+    auto range6 = archive.findByPath("/");
+
+    ASSERT_EQ(range0.begin()->getIndex(), 5);
+    auto count = 0;
+    for(auto& entry: range0) {
+      count++;
+      ASSERT_EQ(entry.getPath().find("A/Main_Page.html"), 0);
+    }
+    ASSERT_EQ(count, 1);
+
+    ASSERT_EQ(range1.begin()->getIndex(), 78);
+    count = 0;
+    for(auto& entry: range1) {
+      count++;
+      std::cout << entry.getPath() << std::endl;
+      ASSERT_EQ(entry.getPath().find("I/s/"), 0);
+    }
+    ASSERT_EQ(count, 31);
+
+    ASSERT_EQ(range2.begin()->getIndex(), 2);
+    count = 0;
+    for(auto& entry: range2) {
+      count++;
+      ASSERT_EQ(entry.getPath().find("-/j/head.js"), 0);
+    }
+    ASSERT_EQ(count, 1);
+
+    ASSERT_EQ(range3.begin()->getIndex(), 75);
+    count = 0;
+    for(auto& entry: range3) {
+      count++;
+      std::cout << entry.getPath() << std::endl;
+      ASSERT_EQ(entry.getPath().find("I"), 0);
+    }
+    ASSERT_EQ(count, 34);
+
+    ASSERT_EQ(range4.begin()->getIndex(), 75);
+    count = 0;
+    for(auto& entry: range4) {
+      count++;
+      std::cout << entry.getPath() << std::endl;
+      ASSERT_EQ(entry.getPath().find("I/"), 0);
+    }
+    ASSERT_EQ(count, 34);
+
+    count = 0;
+    for(auto& entry: range5) {
+      ASSERT_EQ(count, entry.getIndex());
+      count++;
+    }
+    ASSERT_EQ(count, 118);
+
+    count = 0;
+    for(auto& entry: range6) {
+      ASSERT_EQ(count, entry.getIndex());
+      count++;
+    }
+    ASSERT_EQ(count, 118);
+  }
+}
+
+// By Path
+TEST(FindTests, ByPathNons)
+{
+  for(auto& testfile:getDataFilePath("wikibooks_be_all_nopic_2017-02.zim", "nons")) {
+    zim::Archive archive (testfile.path);
+
+    auto range0 = archive.findByPath("ÐÐµÑÑÐ°Ñ_ÑÑÐ°ÑÐ¾Ð½ÐºÐ°.html");
+    auto range1 = archive.findByPath("Ð");
+    auto range2 = archive.findByPath("");
+    auto range3 = archive.findByPath("/");
+
+    auto count = 0;
+    for(auto& entry: range0) {
+      count++;
+      ASSERT_EQ(entry.getPath().find("ÐÐµÑÑÐ°Ñ_ÑÑÐ°ÑÐ¾Ð½ÐºÐ°.html"), 0);
+    }
+    ASSERT_EQ(count, 1);
+
+    count = 0;
+    for(auto& entry: range1) {
+      count++;
+      std::cout << entry.getPath() << std::endl;
+      ASSERT_EQ(entry.getPath().find("Ð"), 0);
+    }
+    ASSERT_EQ(count, 2);
+
+    count = 0;
+    for(auto& entry: range2) {
+      ASSERT_EQ(count, entry.getIndex());
+      count++;
+    }
+    ASSERT_EQ(count, 109);
+
+    count = 0;
+    for(auto& entry: range3) {
+      ASSERT_EQ(count, entry.getIndex());
+      count++;
+    }
+    ASSERT_EQ(count, 109);
+  }
+}
+#endif
+
+} // namespace
diff --git a/test/header.cpp b/test/header.cpp
new file mode 100644
index 0000000..489edcd
--- /dev/null
+++ b/test/header.cpp
@@ -0,0 +1,84 @@
+/*
+ * Copyright (C) 2009 Tommi Maekitalo
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#include <stdexcept>
+#ifdef _WIN32
+#include <windows.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <io.h>
+#include <fileapi.h>
+#endif
+
+#include <iostream>
+#include <sstream>
+
+#include "gtest/gtest.h"
+
+#include "../src/fileheader.h"
+#include "../src/buffer.h"
+#include "../src/buffer_reader.h"
+
+#include "tools.h"
+
+namespace
+{
+
+using zim::unittests::TempFile;
+using zim::unittests::write_to_buffer;
+
+TEST(HeaderTest, read_write_header)
+{
+  zim::Fileheader header;
+  header.setUuid("123456789\0abcd\nf");
+  header.setArticleCount(4711);
+  header.setUrlPtrPos(12345);
+  header.setTitleIdxPos(23456);
+  header.setClusterCount(14);
+  header.setClusterPtrPos(45678);
+  header.setMainPage(11);
+  header.setLayoutPage(13);
+  header.setMimeListPos(72);
+
+  ASSERT_EQ(header.getUuid(), "123456789\0abcd\nf");
+  ASSERT_EQ(header.getArticleCount(), 4711U);
+  ASSERT_EQ(header.getUrlPtrPos(), 12345U);
+  ASSERT_EQ(header.getTitleIdxPos(), 23456U);
+  ASSERT_EQ(header.getClusterCount(), 14U);
+  ASSERT_EQ(header.getClusterPtrPos(), 45678U);
+  ASSERT_EQ(header.getMainPage(), 11U);
+  ASSERT_EQ(header.getLayoutPage(), 13U);
+  ASSERT_EQ(header.getMimeListPos(), 72U);
+
+  auto buffer = write_to_buffer(header);
+  zim::Fileheader header2;
+  header2.read(zim::BufferReader(buffer));
+
+  ASSERT_EQ(header2.getUuid(), "123456789\0abcd\nf");
+  ASSERT_EQ(header2.getArticleCount(), 4711U);
+  ASSERT_EQ(header2.getUrlPtrPos(), 12345U);
+  ASSERT_EQ(header2.getTitleIdxPos(), 23456U);
+  ASSERT_EQ(header2.getClusterCount(), 14U);
+  ASSERT_EQ(header2.getClusterPtrPos(), 45678U);
+  ASSERT_EQ(header2.getMainPage(), 11U);
+  ASSERT_EQ(header2.getLayoutPage(), 13U);
+}
+
+}  // namespace
diff --git a/test/indexing_criteria.cpp b/test/indexing_criteria.cpp
new file mode 100644
index 0000000..163046e
--- /dev/null
+++ b/test/indexing_criteria.cpp
@@ -0,0 +1,202 @@
+/*
+ * Copyright (C) 2021 Matthieu Gautier <mgautier@kymeria.fr>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#define ZIM_PRIVATE
+#include <zim/archive.h>
+#include <zim/item.h>
+#include <zim/search.h>
+#include <zim/suggestion.h>
+#include <zim/writer/item.h>
+
+#include "tools.h"
+#include "gtest/gtest.h"
+
+namespace
+{
+
+using zim::unittests::TempZimArchive;
+using zim::unittests::TestItem;
+using zim::unittests::IsFrontArticle;
+
+
+class TestIndexData : public zim::writer::IndexData {
+  public:
+    TestIndexData(const std::string& content)
+      : m_content(content)
+    {}
+
+    bool hasIndexData() const { return ! m_content.empty(); }
+    std::string getTitle() const { return ""; }
+    std::string getContent() const { return m_content; }
+    std::string getKeywords() const { return ""; }
+    uint32_t getWordCount() const { return 1; }
+    IndexData::GeoPosition getGeoPosition() const { return std::make_tuple(false, 0.0, 0.0); }
+
+  private:
+    std::string m_content;
+};
+
+class IndexDataItem : public TestItem {
+  public:
+    IndexDataItem(const std::string& path, const std::string& mimetype, const std::string& title, const std::string& content, std::shared_ptr<zim::writer::IndexData> indexData)
+    : TestItem(path, mimetype, title, content),
+      mp_indexData(indexData)
+    {}
+
+    std::shared_ptr<zim::writer::IndexData> getIndexData() const { return mp_indexData; }
+  private:
+    std::shared_ptr<zim::writer::IndexData> mp_indexData;
+};
+
+#if defined(ENABLE_XAPIAN)
+
+TEST(IndexCriteria, defaultIndexingBaseOnMimeType)
+{
+  TempZimArchive tza("testZim");
+  zim::writer::Creator creator;
+  creator.configIndexing(true, "en");
+  creator.startZimCreation(tza.getPath());
+
+  creator.addItem(
+    std::make_shared<TestItem>("HtmlTestPath", "text/html", "Test Article", "This is a test article")
+  );
+
+  creator.addItem(
+    std::make_shared<TestItem>("OtherTestPath", "text/plain", "Test Article", "This is a test article")
+  );
+  creator.finishZimCreation();
+
+  zim::Archive archive(tza.getPath());
+
+  zim::Searcher searcher(archive);
+  zim::Query query("test article");
+  auto search = searcher.search(query);
+
+  ASSERT_EQ(1, search.getEstimatedMatches());
+  auto result = search.getResults(0, archive.getEntryCount());
+  auto begin = result.begin();
+  ASSERT_EQ(begin.getPath(), "HtmlTestPath");
+  ASSERT_EQ(++begin, result.end());
+}
+
+TEST(IndexCriteria, specificIndexData)
+{
+  TempZimArchive tza("testZim");
+  zim::writer::Creator creator;
+  creator.configIndexing(true, "en");
+  creator.startZimCreation(tza.getPath());
+
+  // Html content is indexed by default
+  creator.addItem(
+    std::make_shared<TestItem>("HtmlTestPath", "text/html", "Test Article", "This is a test article")
+  );
+
+  // Non html content is not indexed by default
+  creator.addItem(
+    std::make_shared<TestItem>("OtherTestPath", "text/plain", "Test Article", "This is a test article")
+  );
+
+  // Item without a IndexData is not indexed
+  creator.addItem(
+    std::make_shared<IndexDataItem>("HtmlTestPathNull", "text/html", "Test Article", "This is a test article", nullptr)
+  );
+
+  // Item with a IndexData but without data is not indexed
+  creator.addItem(
+    std::make_shared<IndexDataItem>("HtmlTestPathNodata", "text/html", "Test Article", "This is a test article",
+      std::make_shared<TestIndexData>(""))
+  );
+
+  // We index the content with the data of the indexdata if provided
+  creator.addItem(
+    std::make_shared<IndexDataItem>("OtherTestPathWithIndex", "text/plain", "Test Article", "This is content",
+      std::make_shared<TestIndexData>("test article"))
+  );
+  creator.finishZimCreation();
+
+  zim::Archive archive(tza.getPath());
+
+  zim::Searcher searcher(archive);
+  zim::Query query("test article");
+  auto search = searcher.search(query);
+
+  ASSERT_EQ(2, search.getEstimatedMatches());
+  auto result = search.getResults(0, archive.getEntryCount());
+  auto begin = result.begin();
+  ASSERT_EQ(begin.getPath(), "HtmlTestPath");
+  begin++;
+  ASSERT_EQ(begin.getPath(), "OtherTestPathWithIndex");
+  ASSERT_EQ(++begin, result.end());
+}
+
+#endif // ENABLE_XAPIAN
+
+TEST(IndexCriteria, suggestion) {
+  TempZimArchive tza("testZim");
+  zim::writer::Creator creator;
+
+  creator.startZimCreation(tza.getPath());
+
+  // Default html is title indexed
+  creator.addItem(
+    std::make_shared<TestItem>("HtmlTestPath", "text/html", "Test Article", "This is a test article")
+  );
+
+  // Default not html is not title indexed
+  creator.addItem(
+    std::make_shared<TestItem>("OtherTestPath", "text/plain", "Test Article", "This is a test article")
+  );
+
+  // Default redirection is not indexed (even if pointing to html content)
+  creator.addRedirection("Aredirect", "Test Article Redirection", "HtmlTestPath");
+
+  // We can force a html content to not be title indexed
+  creator.addItem(
+    std::make_shared<TestItem>("HtmlTestPathForced", "text/html", "Test Article", "This is a test article", IsFrontArticle::NO)
+  );
+
+  // Default not html is not title indexed
+  creator.addItem(
+    std::make_shared<TestItem>("OtherTestPathForced", "text/plain", "Test Article", "This is a test article", IsFrontArticle::YES)
+  );
+
+  // Redirection need to point to something not already indexed.
+  // As we collapse the suggestion by target path, if we have a redirection to a indexed entry,
+  // the suggestion result will contain only one of them.
+  creator.addRedirection("AredirectForced", "Test Article Redirection", "OtherTestPath", {{zim::writer::FRONT_ARTICLE, 1}});
+
+  creator.finishZimCreation();
+
+  zim::Archive archive(tza.getPath());
+
+  zim::SuggestionSearcher suggestionSearcher(archive);
+  auto suggestion = suggestionSearcher.suggest("Test Article");
+
+  ASSERT_EQ(3, suggestion.getEstimatedMatches());
+  auto result = suggestion.getResults(0, archive.getEntryCount());
+  auto begin = result.begin();
+  ASSERT_EQ(begin->getPath(), "HtmlTestPath");
+  begin++;
+  ASSERT_EQ(begin->getPath(), "OtherTestPathForced");
+  begin++;
+  ASSERT_EQ(begin->getPath(), "AredirectForced");
+  ASSERT_EQ(++begin, result.end());
+}
+
+} // unnamed namespace
diff --git a/test/istreamreader.cpp b/test/istreamreader.cpp
new file mode 100644
index 0000000..2a913c6
--- /dev/null
+++ b/test/istreamreader.cpp
@@ -0,0 +1,66 @@
+/*
+ * Copyright (C) 2020 Veloman Yunkan
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#include "istreamreader.h"
+#include "endian_tools.h"
+
+#include "gtest/gtest.h"
+
+namespace
+{
+
+using namespace zim;
+
+////////////////////////////////////////////////////////////////////////////////
+// IDataStream
+////////////////////////////////////////////////////////////////////////////////
+
+// Implement the IStreamReader interface in the simplest way
+class InfiniteZeroStream : public IStreamReader
+{
+  void readImpl(char* buf, zim::zsize_t nbytes) { memset(buf, 0, nbytes.v); }
+};
+
+// ... and test that it compiles and works as intended
+
+TEST(IStreamReader, read)
+{
+  InfiniteZeroStream izs;
+  IStreamReader& ids = izs;
+  EXPECT_EQ(0, ids.read<int>());
+  EXPECT_EQ(0L, ids.read<long>());
+
+  // zim::fromLittleEndian() handles only integer types
+  // EXPECT_EQ(0.0, ids.read<double>());
+}
+
+TEST(IStreamReader, sub_reader)
+{
+  const size_t N = 16;
+  const char zerobuf[N] = {0};
+  InfiniteZeroStream izs;
+  IStreamReader& ids = izs;
+  auto subReader = ids.sub_reader(zim::zsize_t(N));
+  EXPECT_EQ(subReader->size().v, N);
+  auto buffer = subReader->get_buffer(zim::offset_t(0), zim::zsize_t(N));
+  EXPECT_EQ(buffer.size().v, N);
+  EXPECT_EQ(0, memcmp(buffer.data(), zerobuf, N));
+}
+
+} // unnamed namespace
diff --git a/test/iterator.cpp b/test/iterator.cpp
new file mode 100644
index 0000000..82064ec
--- /dev/null
+++ b/test/iterator.cpp
@@ -0,0 +1,170 @@
+/*
+ * Copyright (C) 2009 Miguel Rocha
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#include <zim/zim.h>
+#include <zim/archive.h>
+#include <zim/error.h>
+#include <zim/item.h>
+
+#include "tools.h"
+#include "gtest/gtest.h"
+
+namespace
+{
+
+using zim::unittests::getDataFilePath;
+
+#if WITH_TEST_DATA
+TEST(ClusterIteratorTest, getEntryByClusterOrder)
+{
+    std::vector<zim::entry_index_type> expected = {
+0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
+23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42,
+43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 109, 110, 111, 112, 113, 114, 115, 116,
+117, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94,
+95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108 };
+
+    for(auto& testfile:getDataFilePath("wikibooks_be_all_nopic_2017-02.zim", "withns")) {
+        zim::Archive archive (testfile.path);
+
+        auto nbEntries = archive.getEntryCount();
+
+        ASSERT_EQ(nbEntries, expected.size());
+
+        for (auto i = 0u; i < nbEntries; i++)
+        {
+            EXPECT_EQ(archive.getEntryByClusterOrder(i).getIndex(), expected[i]);
+        }
+    }
+}
+
+TEST(getEntry, indexOutOfRange)
+{
+    for(auto& testfile:getDataFilePath("wikibooks_be_all_nopic_2017-02.zim", "withns")) {
+        zim::Archive archive (testfile.path);
+
+        auto nbEntries = archive.getEntryCount();
+
+        try {
+            archive.getEntryByPath(nbEntries);
+            FAIL() << "Should throw exception\n";
+        }  catch (std::out_of_range& e) {
+            ASSERT_EQ(e.what(), std::string("entry index out of range"));
+        }  catch(...) {
+            FAIL() << "Should throw exception\n";
+        }
+    }
+}
+
+// ByTitle
+TEST(IteratorTests, begin)
+{
+    std::vector<zim::entry_index_type> expected = {
+0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
+23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42,
+43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 109, 110, 111, 112, 113, 114, 115, 116,
+117, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94,
+95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108 };
+
+    for(auto& testfile:getDataFilePath("wikibooks_be_all_nopic_2017-02.zim", "withns")) {
+        zim::Archive archive (testfile.path);
+
+        int i = 0;
+        for(auto& entry: archive.iterEfficient()) {
+            EXPECT_EQ(entry.getIndex(), expected[i]);
+            i++;
+        }
+    }
+}
+
+
+// ByTitle
+TEST(IteratorTests, beginByTitle)
+{
+    std::vector<zim::entry_index_type> expected = { 5, 7, 8, 9, 10, 11, 12, 13, 14, 15};
+
+    for(auto& testfile:getDataFilePath("wikibooks_be_all_nopic_2017-02.zim")) {
+        zim::Archive archive (testfile.path);
+        std::vector<zim::entry_index_type> expected;
+        if (testfile.category == "withns") {
+          expected = { 5, 7, 8, 9, 10, 11, 12, 13, 14, 15};
+        } else {
+          expected = { 41, 42, 43, 44, 45, 46, 47, 48, 49, 50};
+        }
+
+        auto it = archive.iterByTitle().begin();
+
+        int i = 0;
+        while (i < 10)
+        {
+            EXPECT_EQ(it->getIndex(), expected[i]);
+            it++; i++;
+        }
+        std::cout << "\n";
+    }
+}
+
+
+// ByUrl
+TEST(IteratorTests, beginByPath)
+{
+    std::vector<zim::entry_index_type> expected = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
+
+    for(auto& testfile:getDataFilePath("wikibooks_be_all_nopic_2017-02.zim", "withns")) {
+        zim::Archive archive (testfile.path);
+
+        auto it = archive.iterByPath().begin();
+        int i = 0;
+        while (i < 10)
+        {
+            EXPECT_EQ(it->getIndex(), expected[i]);
+            it++; i++;
+        }
+    }
+}
+
+TEST(IteartorTests, iteratorFunctions)
+{
+    for(auto& testfile:getDataFilePath("wikipedia_en_climate_change_nopic_2020-01.zim")) {
+        const zim::Archive archive(testfile.path);
+        ASSERT_TRUE(archive.hasTitleIndex());
+        const auto mainItem = archive.getMainEntry().getItem(true);
+        auto range = archive.findByTitle(mainItem.getTitle());
+        ASSERT_EQ(range.size(), 1);
+        auto it1 = range.begin();
+        ASSERT_EQ(it1->getTitle(), mainItem.getTitle());
+
+        auto it2 = range.begin();
+        it2 = it1;   // test operator
+        ASSERT_EQ(it2->getTitle(), mainItem.getTitle());
+
+        it1++;
+        ASSERT_EQ(it1, range.end());
+        ASSERT_NO_THROW(it1->getTitle());
+
+        it1--;
+        ASSERT_EQ(it1->getTitle(), mainItem.getTitle());
+  }
+}
+
+#endif
+
+} // namespace
diff --git a/test/lrucache.cpp b/test/lrucache.cpp
new file mode 100644
index 0000000..6416d09
--- /dev/null
+++ b/test/lrucache.cpp
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2014, lamerman
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice, this
+ *   list of conditions and the following disclaimer.
+ *
+ * * Redistributions in binary form must reproduce the above copyright notice,
+ *   this list of conditions and the following disclaimer in the documentation
+ *   and/or other materials provided with the distribution.
+ *
+ * * Neither the name of lamerman nor the names of its
+ *   contributors may be used to endorse or promote products derived from
+ *   this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include "lrucache.h"
+#include "concurrent_cache.h"
+#include "gtest/gtest.h"
+
+const int NUM_OF_TEST1_RECORDS = 100;
+const int NUM_OF_TEST2_RECORDS = 100;
+const int TEST2_CACHE_CAPACITY = 50;
+
+TEST(CacheTest, SimplePut) {
+    zim::lru_cache<int, int> cache_lru(1);
+    cache_lru.put(7, 777);
+    EXPECT_TRUE(cache_lru.exists(7));
+    EXPECT_EQ(777, cache_lru.get(7));
+    EXPECT_EQ(1, cache_lru.size());
+}
+
+TEST(CacheTest, OverwritingPut) {
+    zim::lru_cache<int, int> cache_lru(1);
+    cache_lru.put(7, 777);
+    cache_lru.put(7, 222);
+    EXPECT_TRUE(cache_lru.exists(7));
+    EXPECT_EQ(222, cache_lru.get(7));
+    EXPECT_EQ(1, cache_lru.size());
+}
+
+TEST(CacheTest, MissingValue) {
+    zim::lru_cache<int, int> cache_lru(1);
+    EXPECT_TRUE(cache_lru.get(7).miss());
+    EXPECT_FALSE(cache_lru.get(7).hit());
+    EXPECT_THROW(cache_lru.get(7).value(), std::range_error);
+}
+
+TEST(CacheTest, DropValue) {
+    zim::lru_cache<int, int> cache_lru(3);
+    cache_lru.put(7, 777);
+    cache_lru.put(8, 888);
+    cache_lru.put(9, 999);
+    EXPECT_EQ(3, cache_lru.size());
+    EXPECT_TRUE(cache_lru.exists(7));
+    EXPECT_EQ(777, cache_lru.get(7));
+
+    EXPECT_TRUE(cache_lru.drop(7));
+
+    EXPECT_EQ(2, cache_lru.size());
+    EXPECT_FALSE(cache_lru.exists(7));
+    EXPECT_THROW(cache_lru.get(7).value(), std::range_error);
+
+    EXPECT_FALSE(cache_lru.drop(7));
+}
+
+TEST(CacheTest1, KeepsAllValuesWithinCapacity) {
+    zim::lru_cache<int, int> cache_lru(TEST2_CACHE_CAPACITY);
+
+    for (int i = 0; i < NUM_OF_TEST2_RECORDS; ++i) {
+        cache_lru.put(i, i);
+    }
+
+    for (int i = 0; i < NUM_OF_TEST2_RECORDS - TEST2_CACHE_CAPACITY; ++i) {
+        EXPECT_FALSE(cache_lru.exists(i));
+    }
+
+    for (int i = NUM_OF_TEST2_RECORDS - TEST2_CACHE_CAPACITY; i < NUM_OF_TEST2_RECORDS; ++i) {
+        EXPECT_TRUE(cache_lru.exists(i));
+        EXPECT_EQ(i, cache_lru.get(i));
+    }
+
+    size_t size = cache_lru.size();
+    EXPECT_EQ(TEST2_CACHE_CAPACITY, size);
+}
+
+TEST(ConcurrentCacheTest, handleException) {
+    zim::ConcurrentCache<int, int> cache(1);
+    auto val = cache.getOrPut(7, []() { return 777; });
+    EXPECT_EQ(val, 777);
+    EXPECT_THROW(cache.getOrPut(8, []() { throw std::runtime_error("oups"); return 0; }), std::runtime_error);
+    val = cache.getOrPut(8, []() { return 888; });
+    EXPECT_EQ(val, 888);
+}
diff --git a/test/meson.build b/test/meson.build
new file mode 100644
index 0000000..e0a4988
--- /dev/null
+++ b/test/meson.build
@@ -0,0 +1,58 @@
+tests = [
+    'lrucache',
+    'cluster',
+    'creator',
+    'dirent',
+    'header',
+    'uuid',
+    'template',
+    'archive',
+    'iterator',
+    'reader',
+    'find',
+    'compression',
+    'dirent_lookup',
+    'istreamreader',
+    'decoderstreamreader',
+    'rawstreamreader',
+    'bufferstreamer',
+    'parseLongPath',
+    'random',
+    'tooltesting',
+    'tinyString',
+    'suggestion_iterator',
+    'indexing_criteria'
+]
+
+if xapian_dep.found()
+    tests += ['search', 'defaultIndexdata', 'search_iterator', 'suggestion']
+endif
+
+datadir = get_option('test_data_dir')
+if datadir == 'none'
+    test_cpp_args = '-DWITH_TEST_DATA=0'
+else
+    test_cpp_args = '-DWITH_TEST_DATA=1'
+    if datadir == ''
+        # We need to download the test data.
+        datadir = join_paths(meson.current_build_dir(), 'data')
+    endif
+    run_target('download_test_data', command : [test_data_downloader, '--remove-top-dir', datadir])
+endif
+
+testenv = environment()
+testenv.set('ZIM_TEST_DATA_DIR', datadir)
+
+if gtest_dep.found() and not meson.is_cross_build()
+    foreach test_name : tests
+        test_exe = executable(test_name, [test_name+'.cpp', 'tools.cpp'],
+                              implicit_include_directories: false,
+                              include_directories: [include_directory, src_directory],
+                              link_with: libzim,
+                              link_args: extra_link_args,
+                              cpp_args: test_cpp_args,
+                              dependencies: deps + [gtest_dep],
+                              build_rpath: '$ORIGIN')
+        test(test_name, test_exe, timeout : 120, env: testenv)
+    endforeach
+endif
diff --git a/test/parseLongPath.cpp b/test/parseLongPath.cpp
new file mode 100644
index 0000000..2ee7c03
--- /dev/null
+++ b/test/parseLongPath.cpp
@@ -0,0 +1,98 @@
+/*
+ * Copyright (C) 2020 Matthieu Gautier mgautier@kymeria.fr
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#include "gtest/gtest.h"
+#include <string>
+#include <tuple>
+
+namespace zim {
+  std::tuple<char, std::string> parseLongPath(const std::string& longPath);
+};
+
+using namespace zim;
+
+namespace
+{
+TEST(ParseLongPathTest, invalid)
+{
+  ASSERT_THROW(parseLongPath(""), std::runtime_error);
+  ASSERT_THROW(parseLongPath("AB"), std::runtime_error);
+  ASSERT_THROW(parseLongPath("AB/path"), std::runtime_error);
+  ASSERT_THROW(parseLongPath("/"), std::runtime_error);
+  ASSERT_THROW(parseLongPath("//"), std::runtime_error);
+  ASSERT_THROW(parseLongPath("/AB"), std::runtime_error);
+  ASSERT_THROW(parseLongPath("AB/"), std::runtime_error);
+  ASSERT_THROW(parseLongPath("/AB/path"), std::runtime_error);
+  ASSERT_THROW(parseLongPath("//A/path"), std::runtime_error);
+}
+
+TEST(ParseLongPathTest, valid)
+{
+  char ns;
+  std::string path;
+
+  std::tie(ns, path) = parseLongPath("A/path");
+  ASSERT_EQ(ns, 'A');
+  ASSERT_EQ(path, "path");
+
+  std::tie(ns, path) = parseLongPath("A/p");
+  ASSERT_EQ(ns, 'A');
+  ASSERT_EQ(path, "p");
+
+  std::tie(ns, path) = parseLongPath("/B/path");
+  ASSERT_EQ(ns, 'B');
+  ASSERT_EQ(path, "path");
+
+  std::tie(ns, path) = parseLongPath("/B/p");
+  ASSERT_EQ(ns, 'B');
+  ASSERT_EQ(path, "p");
+
+  std::tie(ns, path) = parseLongPath("C//path");
+  ASSERT_EQ(ns, 'C');
+  ASSERT_EQ(path, "/path");
+
+  std::tie(ns, path) = parseLongPath("/C//path");
+  ASSERT_EQ(ns, 'C');
+  ASSERT_EQ(path, "/path");
+
+  std::tie(ns, path) = parseLongPath("L/path/with/separator");
+  ASSERT_EQ(ns, 'L');
+  ASSERT_EQ(path, "path/with/separator");
+
+  std::tie(ns, path) = parseLongPath("L//path/with/separator");
+  ASSERT_EQ(ns, 'L');
+  ASSERT_EQ(path, "/path/with/separator");
+
+  std::tie(ns, path) = parseLongPath("A");
+  ASSERT_EQ(ns, 'A');
+  ASSERT_EQ(path, "");
+
+  std::tie(ns, path) = parseLongPath("/A");
+  ASSERT_EQ(ns, 'A');
+  ASSERT_EQ(path, "");
+
+  std::tie(ns, path) = parseLongPath("A/");
+  ASSERT_EQ(ns, 'A');
+  ASSERT_EQ(path, "");
+
+  std::tie(ns, path) = parseLongPath("/A/");
+  ASSERT_EQ(ns, 'A');
+  ASSERT_EQ(path, "");
+}
+};
diff --git a/test/random.cpp b/test/random.cpp
new file mode 100644
index 0000000..1a0cb2c
--- /dev/null
+++ b/test/random.cpp
@@ -0,0 +1,69 @@
+/*
+ * Copyright (C) 2021 Matthieu Gautier mgautier@kymeria.fr
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#include "gtest/gtest.h"
+
+namespace zim {
+  uint32_t randomNumber(uint32_t max);
+};
+
+using namespace zim;
+
+namespace
+{
+TEST(Random, smallMax)
+{
+  for(auto i=0; i<1000; i++) {
+    ASSERT_EQ(randomNumber(0), 0);
+  }
+
+
+  for(auto i=0; i<1000; i++) {
+    auto r = randomNumber(1);
+    ASSERT_TRUE(r>=0 && r<=1) << r;
+  }
+}
+
+TEST(Random, distribution)
+{
+  const uint32_t NB_NUMBERS = 1000000;
+  const uint32_t NB_BUCKETS = 100;
+  const uint32_t BUCKET_SIZE = NB_NUMBERS/NB_BUCKETS;
+  const uint32_t MAX_RANDOM = 1000000;
+  std::vector<uint32_t> distribution(NB_BUCKETS);
+
+  for (auto i=0U; i<NB_NUMBERS; i++) {
+    auto r = randomNumber(MAX_RANDOM);
+    auto bucket_index = (float)r / MAX_RANDOM * NB_BUCKETS;
+    if (bucket_index == NB_BUCKETS) {
+      // This only happens when r == MAX_RANDOM.
+      bucket_index = NB_BUCKETS-1;
+    }
+    distribution[bucket_index]++;
+  }
+  // Each bucket should have around BUCKET_SIZE element.
+  // Test this is true at 10%
+  for(auto nbElement:distribution) {
+    ASSERT_GT(nbElement, BUCKET_SIZE*0.9);
+    ASSERT_LT(nbElement, BUCKET_SIZE*1.1);
+  }
+}
+
+
+};
diff --git a/test/rawstreamreader.cpp b/test/rawstreamreader.cpp
new file mode 100644
index 0000000..ec1bdb4
--- /dev/null
+++ b/test/rawstreamreader.cpp
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2020 Veloman Yunkan
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#include "rawstreamreader.h"
+#include "buffer.h"
+#include "buffer_reader.h"
+
+#include "gtest/gtest.h"
+
+namespace
+{
+
+using namespace zim;
+
+std::string toString(const Buffer& buffer)
+{
+  return std::string(buffer.data(), buffer.size().v);
+}
+
+TEST(ReaderDataStreamWrapper, shouldJustWork)
+{
+  char data[] = "abcdefghijklmnopqrstuvwxyz";
+  toLittleEndian(uint32_t(1234), data);
+  toLittleEndian(int64_t(-987654321), data+18);
+
+  auto  reader = std::make_shared<BufferReader>(Buffer::makeBuffer(data, zsize_t(sizeof(data))));
+
+  RawStreamReader rdr(reader);
+
+  ASSERT_EQ(1234,         rdr.read<uint32_t>());
+  auto subbuffer = rdr.sub_reader(zsize_t(4))->get_buffer(offset_t(0), zsize_t(4));
+  ASSERT_EQ("efgh",       toString(subbuffer));
+  subbuffer = rdr.sub_reader(zsize_t(10))->get_buffer(offset_t(0), zsize_t(10));
+  ASSERT_EQ("ijklmnopqr", toString(subbuffer));
+  ASSERT_EQ(-987654321,   rdr.read<int64_t>());
+}
+
+} // unnamed namespace
diff --git a/test/reader.cpp b/test/reader.cpp
new file mode 100644
index 0000000..183d857
--- /dev/null
+++ b/test/reader.cpp
@@ -0,0 +1,157 @@
+/*
+ * Copyright (C) 2021 Matthieu Gautier <mgautier@kymeria.fr>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#include "tools.h"
+#include "buffer_reader.h"
+#include "file_reader.h"
+#include "fs.h"
+#include "file_compound.h"
+
+#include "gtest/gtest.h"
+
+namespace
+{
+
+using namespace zim;
+using zim::unittests::makeTempFile;
+
+////////////////////////////////////////////////////////////////////////////////
+// FileReader
+////////////////////////////////////////////////////////////////////////////////
+
+std::unique_ptr<Reader> createFileReader(const char* data, zsize_t size) {
+  const auto tmpfile = makeTempFile("data", data);
+  auto fd = DEFAULTFS::openFile(tmpfile->path());
+  return std::unique_ptr<Reader>(new FileReader(std::make_shared<typename DEFAULTFS::FD>(std::move(fd)), offset_t(0), size));
+}
+
+std::unique_ptr<Reader> createMultiFileReader(const char* data, zsize_t size) {
+  const auto tmpfile = makeTempFile("data", data);
+  auto fileCompound = std::make_shared<FileCompound>(tmpfile->path());
+  return std::unique_ptr<Reader>(new MultiPartFileReader(fileCompound));
+}
+
+std::unique_ptr<Reader> createBufferReader(const char* data, zsize_t size) {
+  auto buffer = Buffer::makeBuffer(data, size);
+  return std::unique_ptr<Reader>(new BufferReader(buffer));
+}
+
+auto createReaders = {
+  createFileReader,
+  createMultiFileReader,
+  createBufferReader
+};
+
+TEST(FileReader, shouldJustWork)
+{
+  char data[] = "abcdefghijklmnopqrstuvwxyz";
+  for(auto& createReader:createReaders) {
+    auto baseOffset = createReader==createBufferReader ? ((offset_type)data) : 0;
+    auto reader = createReader(data, zsize_t(26));
+
+    ASSERT_EQ(offset_t(baseOffset+0), reader->offset());
+    ASSERT_EQ(zsize_t(sizeof(data)-1), reader->size());
+
+    ASSERT_EQ('a', reader->read(offset_t(0)));
+    ASSERT_EQ('e', reader->read(offset_t(4)));
+
+    char out[4] = {0, 0, 0, 0};
+    reader->read(out, offset_t(0), zsize_t(4));
+    ASSERT_EQ(0, memcmp(out, "abcd", 4));
+
+    reader->read(out, offset_t(5), zsize_t(2));
+    ASSERT_EQ(0, memcmp(out, "fgcd", 4));
+
+    reader->read(out, offset_t(10), zsize_t(0));
+    ASSERT_EQ(0, memcmp(out, "fgcd", 4));
+
+    reader->read(out, offset_t(10), zsize_t(4));
+    ASSERT_EQ(0, memcmp(out, "klmn", 4));
+
+    // Can read last bit of the file.
+    ASSERT_EQ('z', reader->read(offset_t(25)));
+    reader->read(out, offset_t(25), zsize_t(1));
+    ASSERT_EQ(0, memcmp(out, "zlmn", 4));
+
+    // Fail if we try to read out of the file.
+    ASSERT_THROW(reader->read(offset_t(26)), std::runtime_error);
+    ASSERT_THROW(reader->read(out, offset_t(25), zsize_t(4)), std::runtime_error);
+    ASSERT_THROW(reader->read(out, offset_t(30), zsize_t(4)), std::runtime_error);
+    ASSERT_THROW(reader->read(out, offset_t(30), zsize_t(0)), std::runtime_error);
+  }
+}
+
+TEST(FileReader, subReader)
+{
+  char data[] = "abcdefghijklmnopqrstuvwxyz";
+  for(auto& createReader:createReaders) {
+    auto baseOffset = createReader==createBufferReader ? ((offset_type)data) : 0;
+    auto reader = createReader(data, zsize_t(26));
+
+    auto subReader = reader->sub_reader(offset_t(4), zsize_t(20));
+
+    ASSERT_EQ(offset_t(baseOffset+4), subReader->offset());
+    ASSERT_EQ(zsize_t(20), subReader->size());
+
+    ASSERT_EQ('e', subReader->read(offset_t(0)));
+    ASSERT_EQ('i', subReader->read(offset_t(4)));
+
+    char out[4] = {0, 0, 0, 0};
+    subReader->read(out, offset_t(0), zsize_t(4));
+    ASSERT_EQ(0, memcmp(out, "efgh", 4));
+
+    subReader->read(out, offset_t(5), zsize_t(2));
+    ASSERT_EQ(0, memcmp(out, "jkgh", 4));
+
+    // Can read last bit of the file.
+    ASSERT_EQ('x', subReader->read(offset_t(19)));
+    subReader->read(out, offset_t(19), zsize_t(1));
+    ASSERT_EQ(0, memcmp(out, "xkgh", 4));
+
+    // Fail if we try to read out of the file.
+    ASSERT_THROW(subReader->read(offset_t(20)), std::runtime_error);
+    ASSERT_THROW(subReader->read(out, offset_t(18), zsize_t(4)), std::runtime_error);
+    ASSERT_THROW(subReader->read(out, offset_t(30), zsize_t(4)), std::runtime_error);
+    ASSERT_THROW(subReader->read(out, offset_t(30), zsize_t(0)), std::runtime_error);
+  }
+}
+
+TEST(FileReader, zeroReader)
+{
+  char data[] = "";
+  for(auto& createReader:createReaders) {
+    auto baseOffset = createReader==createBufferReader ? ((offset_type)data) : 0;
+    auto reader = createReader(data, zsize_t(0));
+
+    ASSERT_EQ(offset_t(baseOffset), reader->offset());
+    ASSERT_EQ(zsize_t(0), reader->size());
+
+    // Fail if we try to read out of the file.
+    ASSERT_THROW(reader->read(offset_t(0)), std::runtime_error);
+    char out[4] = {0, 0, 0, 0};
+    ASSERT_THROW(reader->read(out, offset_t(0), zsize_t(4)), std::runtime_error);
+
+    // Ok to read 0 byte on a 0 sized reader
+    reader->read(out, offset_t(0), zsize_t(0));
+    const char nullarray[] = {0, 0, 0, 0};
+    ASSERT_EQ(0, memcmp(out, nullarray, 4));
+  }
+}
+
+} // unnamed namespace
diff --git a/test/search.cpp b/test/search.cpp
new file mode 100644
index 0000000..5a0d0bb
--- /dev/null
+++ b/test/search.cpp
@@ -0,0 +1,248 @@
+/*
+ * Copyright (C) 2020 Veloman Yunkan
+ * Copyright (C) 2021 Maneesh P M <manu.pm55@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#define ZIM_PRIVATE
+#include <zim/archive.h>
+#include <zim/item.h>
+#include <zim/search.h>
+
+#include <xapian.h>
+
+#include "tools.h"
+#include "gtest/gtest.h"
+
+namespace
+{
+
+using zim::unittests::TempZimArchive;
+using zim::unittests::TestItem;
+
+std::vector<std::string> getSnippet(const zim::Archive archive, std::string query, int range) {
+  zim::Searcher searcher(archive);
+  zim::Query _query(query);
+  auto search = searcher.search(_query);
+  auto result = search.getResults(0, range);
+
+  std::vector<std::string> snippets;
+  for (auto entry = result.begin(); entry != result.end(); entry++) {
+    snippets.push_back(entry.getSnippet());
+  }
+  return snippets;
+}
+
+#define EXPECT_SNIPPET_EQ(archive, range, query, ...)           \
+  ASSERT_EQ(                                                    \
+    getSnippet(archive, query, range),                          \
+    std::vector<std::string>({__VA_ARGS__})                     \
+  )
+
+// To secure compatibity of new zim files with older kiwixes, we need to index
+// full path of the entries as data of documents.
+TEST(Search, indexFullPath)
+{
+  TempZimArchive tza("testZim");
+  zim::writer::Creator creator;
+  creator.configIndexing(true, "en");
+  creator.startZimCreation(tza.getPath());
+
+  auto item = std::make_shared<TestItem>("testPath", "text/html", "Test Article", "This is a test article");
+  creator.addItem(item);
+
+  creator.setMainPath("testPath");
+  creator.addMetadata("Title", "Test zim");
+  creator.finishZimCreation();
+
+  zim::Archive archive(tza.getPath());
+
+  zim::Searcher searcher(archive);
+  zim::Query query("test article");
+  auto search = searcher.search(query);
+
+  ASSERT_NE(0, search.getEstimatedMatches());
+  auto result = search.getResults(0, archive.getEntryCount());
+  ASSERT_EQ(result.begin().getPath(), "testPath");
+  ASSERT_EQ(result.begin().getDbData().substr(0, 2), "C/");
+}
+
+TEST(Search, fulltextSnippet)
+{
+  TempZimArchive tza("testZim");
+  zim::writer::Creator creator;
+  creator.configIndexing(true, "en");
+  creator.startZimCreation(tza.getPath());
+  auto item = std::make_shared<TestItem>("testPath", "text/html", "Test Article", "this is the content of a random paragraph without any context");
+  creator.addItem(item);
+
+  creator.setMainPath("testPath");
+  creator.addMetadata("Title", "Test zim");
+  creator.finishZimCreation();
+
+  zim::Archive archive(tza.getPath());
+
+  EXPECT_SNIPPET_EQ(
+    archive,
+    1,
+    "random paragraph context",
+    {
+      "this is the content of a <b>random</b> <b>paragraph</b> without any <b>context</b>"
+    }
+  );
+}
+
+TEST(Search, multiSearch)
+{
+  TempZimArchive tza("testZim");
+
+  zim::writer::Creator creator;
+  creator.configIndexing(true, "en");
+  creator.startZimCreation(tza.getPath());
+  creator.addItem(std::make_shared<TestItem>("path0", "text/html", "Test Article0", "This is a test article. temp0"));
+  creator.addItem(std::make_shared<TestItem>("path1", "text/html", "Test Article1", "This is another test article. For article1."));
+  creator.addItem(std::make_shared<TestItem>("path2", "text/html", "Test Article001", "This is a test article. Super. temp0"));
+  creator.addItem(std::make_shared<TestItem>("path3", "text/html", "Test Article2", "This is a test article. Super."));
+  creator.addItem(std::make_shared<TestItem>("path4", "text/html", "Test Article23", "This is a test article. bis."));
+
+  creator.setMainPath("path0");
+  creator.finishZimCreation();
+
+  zim::Archive archive(tza.getPath());
+
+  zim::Searcher searcher(archive);
+
+  zim::Archive archive2(tza.getPath());
+  searcher.addArchive(archive2);
+
+  searcher.setVerbose(true);
+  zim::Query query("test article");
+  auto search0 = searcher.search(query);
+
+  ASSERT_EQ(archive.getEntryCount(), search0.getEstimatedMatches());
+  auto result0 = search0.getResults(0, 2);
+  ASSERT_EQ(result0.size(), 2);
+  auto it0 = result0.begin();
+
+  auto result1 = search0.getResults(0, 5);
+  ASSERT_EQ(result1.size(), 5);
+  auto it1 = result1.begin();
+
+  ASSERT_EQ(it0.getPath(), it1.getPath());
+  it0++; it1++;
+  ASSERT_EQ(it0.getPath(), it1.getPath());
+  it0++; it1++;
+  ASSERT_EQ(it0, result0.end());
+  it1++;it1++;it1++;
+  ASSERT_EQ(it1, result1.end());
+
+  // Check result retrieval in start ranges
+  auto result2 = search0.getResults(0, 3);    // Should return 3 results
+  ASSERT_EQ(result2.size(), 3);
+
+  // Check result retrieval in middle ranges
+  auto result3 = search0.getResults(2, 3);    // Should Return 3 result
+  ASSERT_EQ(result3.size(), 3);
+
+  // Be able to do a different search using the same searcher.
+  query.setQuery("super");
+  auto search1 = searcher.search(query);
+  ASSERT_EQ(2, search1.getEstimatedMatches());
+
+  auto searcher2(searcher);
+  searcher2.setVerbose(true);
+  query.setQuery("temp0");
+  auto search2 = searcher2.search(query);
+  auto result = search2.getResults(0, search2.getEstimatedMatches());
+  ASSERT_EQ(2, search2.getEstimatedMatches());
+  ASSERT_EQ(2, result.size());
+}
+
+TEST(Search, noFTIndex)
+{
+  TempZimArchive tza("testZim");
+
+  zim::writer::Creator creator;
+  creator.configIndexing(false, "en");
+  creator.startZimCreation(tza.getPath());
+  creator.addItem(std::make_shared<TestItem>("path0", "text/html", "Test Article0", "This is a test article. temp0"));
+
+  creator.setMainPath("path0");
+  creator.finishZimCreation();
+
+  zim::Archive archive(tza.getPath());
+
+  zim::Searcher searcher(archive);
+  searcher.setVerbose(true);
+  zim::Query query("test article");
+  ASSERT_THROW(searcher.search(query), std::runtime_error);
+}
+
+TEST(Search, noStemming)
+{
+  TempZimArchive tza("testZim");
+
+  zim::writer::Creator creator;
+  creator.configIndexing(true, "nostem");
+  creator.startZimCreation(tza.getPath());
+  creator.addItem(std::make_shared<TestItem>("path0", "text/html", "Test Article0", "This is a test article. temp0"));
+  creator.addItem(std::make_shared<TestItem>("path1", "text/html", "Test Article1", "This is another test article. For article1."));
+
+  creator.setMainPath("path0");
+  creator.finishZimCreation();
+
+  zim::Archive archive(tza.getPath());
+
+  zim::Searcher searcher(std::vector<zim::Archive>{});
+  searcher.addArchive(archive);
+  searcher.setVerbose(true);
+
+  zim::Query query("test article");
+  auto search = searcher.search(query);
+
+  ASSERT_EQ(archive.getEntryCount(), search.getEstimatedMatches());
+  auto result = search.getResults(0, 1);
+  ASSERT_EQ(result.begin().getTitle(), "Test Article0");
+}
+
+TEST(Search, geoQuery)
+{
+  TempZimArchive tza("testZim");
+
+  std::string content = R"(<html><head><meta name="keywords" content="some keyword important"><meta name="geo.position" content="45.000;10.000"></head><body>Test geoquery</body><html>)";
+  zim::writer::Creator creator;
+  creator.configIndexing(true, "en");
+  creator.startZimCreation(tza.getPath());
+  creator.addItem(std::make_shared<TestItem>("path0", "text/html", "Test Article", content));
+
+  creator.setMainPath("path0");
+  creator.finishZimCreation();
+
+  zim::Archive archive(tza.getPath());
+
+  zim::Searcher searcher(archive);
+  searcher.setVerbose(true);
+
+  zim::Query query("geoquery");
+  query.setGeorange(45.000, 10.000, 100);
+  auto search = searcher.search(query);
+
+  ASSERT_EQ(archive.getEntryCount(), search.getEstimatedMatches());
+  auto result = search.getResults(0, 1);
+  ASSERT_EQ(result.begin().getTitle(), "Test Article");
+}
+} // unnamed namespace
diff --git a/test/search_iterator.cpp b/test/search_iterator.cpp
new file mode 100644
index 0000000..7754b84
--- /dev/null
+++ b/test/search_iterator.cpp
@@ -0,0 +1,156 @@
+/*
+ * Copyright (C) 2021 Maneesh P M <manu.pm55@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#define ZIM_PRIVATE
+#include <zim/archive.h>
+#include <zim/search.h>
+#include <zim/search_iterator.h>
+#include <zim/error.h>
+#include "tools.h"
+
+#include "gtest/gtest.h"
+
+namespace {
+
+using zim::unittests::TempZimArchive;
+
+TEST(search_iterator, uninitialized) {
+  zim::SearchResultSet::iterator it;
+  ASSERT_EQ(it.getTitle(), "");
+  ASSERT_EQ(it.getPath(), "");
+  ASSERT_EQ(it.getSnippet(), "");
+  ASSERT_EQ(it.getScore(), 0);
+  ASSERT_EQ(it.getFileIndex(), 0);
+  ASSERT_EQ(it.getWordCount(), -1);
+  ASSERT_EQ(it.getSize(), -1);
+  ASSERT_THROW(it.getZimId(), std::runtime_error);
+  ASSERT_THROW(*it, std::runtime_error);
+  ASSERT_THROW(it.operator->(), std::runtime_error);
+}
+
+TEST(search_iterator, end) {
+  TempZimArchive tza("testZim");
+
+  zim::Archive archive = tza.createZimFromContent({
+    {"article 1", "item a"}
+  });
+
+  zim::Searcher searcher(archive);
+  zim::Query query("item");
+  auto search = searcher.search(query);
+  auto result = search.getResults(0, archive.getEntryCount());
+
+  auto it = result.end();
+
+  ASSERT_THROW(it.getTitle(), std::runtime_error);
+  ASSERT_THROW(it.getPath(), std::runtime_error);
+  ASSERT_EQ(it.getSnippet(), "");
+//  ASSERT_EQ(it.getScore(), 0); Unspecified, may be 0 or 1. To fix.
+  ASSERT_EQ(it.getFileIndex(), 0);
+  ASSERT_THROW(it.getWordCount(), std::runtime_error);
+  ASSERT_EQ(it.getSize(), -1);
+  ASSERT_THROW(*it, std::runtime_error);
+  ASSERT_THROW(it.operator->(), std::runtime_error);
+}
+
+TEST(search_iterator, copy) {
+  TempZimArchive tza("testZim");
+
+  zim::Archive archive = tza.createZimFromContent({
+    {"article 1", "item a"}
+  });
+
+  zim::Searcher searcher(archive);
+  zim::Query query(std::string("item"));
+  auto search = searcher.search(query);
+  auto result = search.getResults(0, archive.getEntryCount());
+
+  auto it = result.begin();
+
+  auto it2 = it;
+  ASSERT_EQ(it.getTitle(), it2.getTitle());
+
+  it = result.end();
+  it2 = it;
+  ASSERT_EQ(it, it2);
+  ASSERT_THROW(it.getTitle(), std::runtime_error);
+  ASSERT_THROW(it2.getTitle(), std::runtime_error);
+}
+
+TEST(search_iterator, functions) {
+  TempZimArchive tza("testZim");
+
+  zim::Archive archive = tza.createZimFromContent({
+    {"item a", "item item item"},
+    {"Item B", "item item 2"},
+    {"iTem Ä", "item number 3"}  // forcing an order using wdf
+  });
+
+  zim::Searcher searcher(archive);
+  zim::Query query("item");
+  auto search = searcher.search(query);
+  auto result = search.getResults(0, archive.getEntryCount());
+
+  auto it = result.begin();
+
+  // Test functions
+  ASSERT_EQ(it.getTitle(), "item a");
+  ASSERT_EQ(it.getPath(), "dummyPathitem a");
+  ASSERT_EQ(it.getScore(), 100);
+  ASSERT_EQ(it.getFileIndex(), 0);
+  ASSERT_EQ(it.getZimId(), archive.getUuid());
+  ASSERT_EQ(it.getWordCount(), 3);
+  ASSERT_EQ(it.getSize(), -1);                 // Unimplemented
+
+  // Check getTitle for accents/cased text
+  it++;
+  ASSERT_EQ(it.getTitle(), "Item B");
+  it++;
+  ASSERT_EQ(it.getTitle(), "iTem Ä");
+}
+
+TEST(search_iterator, iteration) {
+  TempZimArchive tza("testZim");
+
+  zim::Archive archive = tza.createZimFromContent({
+    {"article 1", "item"},
+    {"article 2", "another item in article 2"}  // different wdf
+  });
+
+  zim::Searcher searcher(archive);
+  auto search = searcher.search(std::string("item"));
+  auto result = search.getResults(0, archive.getEntryCount());
+
+  auto it = result.begin();
+  ASSERT_EQ(it.getTitle(), result.begin().getTitle());
+
+  ASSERT_EQ(it.getTitle(), "article 1");
+  it++;
+  ASSERT_EQ(it.getTitle(), "article 2");
+  ASSERT_TRUE(it != result.begin());
+
+  it--;
+  ASSERT_EQ(it.getTitle(), "article 1");
+  ASSERT_TRUE(result.begin() == it);
+
+  it++; it++;
+  ASSERT_TRUE(it == result.end());
+}
+
+} // anonymous namespace
diff --git a/test/suggestion.cpp b/test/suggestion.cpp
new file mode 100644
index 0000000..31ee727
--- /dev/null
+++ b/test/suggestion.cpp
@@ -0,0 +1,604 @@
+/*
+ * Copyright (C) 2021 Maneesh P M <manu.pm55@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#define ZIM_PRIVATE
+
+#include <zim/archive.h>
+#include <zim/suggestion.h>
+#include <zim/item.h>
+
+#include "tools.h"
+
+#include "gtest/gtest.h"
+
+namespace {
+
+  using zim::unittests::TempZimArchive;
+  using zim::unittests::TestItem;
+  using zim::unittests::getDataFilePath;
+
+  std::vector<std::string> getSuggestions(const zim::Archive archive, std::string query, int range) {
+    zim::SuggestionSearcher suggestionSearcher(archive);
+    suggestionSearcher.setVerbose(true);
+    auto suggestionSearch = suggestionSearcher.suggest(query);
+    auto suggestionResult = suggestionSearch.getResults(0, range);
+
+    std::vector<std::string> result;
+    for (auto entry : suggestionResult) {
+      result.push_back(entry.getTitle());
+    }
+    return result;
+  }
+
+  std::vector<std::string> getSnippet(const zim::Archive archive, std::string query, int range) {
+    zim::SuggestionSearcher suggestionSearcher(archive);
+    auto suggestionSearch = suggestionSearcher.suggest(query);
+    auto result = suggestionSearch.getResults(0, range);
+
+    std::vector<std::string> snippets;
+    for (auto entry : result) {
+      snippets.push_back(entry.getSnippet());
+    }
+    return snippets;
+  }
+
+#define EXPECT_SUGGESTION_RESULTS(archive, query, ...)          \
+  ASSERT_EQ(                                                    \
+      getSuggestions(archive, query, archive.getEntryCount()),  \
+      std::vector<std::string>({__VA_ARGS__})                   \
+  )
+
+#define EXPECT_SNIPPET_EQ(archive, range, query, ...)           \
+  ASSERT_EQ(                                                    \
+    getSnippet(archive, query, range),                          \
+    std::vector<std::string>({__VA_ARGS__})                     \
+  )                                                             \
+
+#if WITH_TEST_DATA
+TEST(Suggestion, searchByTitle)
+{
+  for(auto& testfile:getDataFilePath("small.zim")) {
+    const zim::Archive archive(testfile.path);
+    ASSERT_TRUE(archive.hasTitleIndex());
+    const auto mainItem = archive.getMainEntry().getItem(true);
+    zim::SuggestionSearcher suggestionSearcher(archive);
+    auto suggestionSearch = suggestionSearcher.suggest(mainItem.getTitle());
+    ASSERT_NE(0, suggestionSearch.getEstimatedMatches());
+    auto result = suggestionSearch.getResults(0, archive.getEntryCount());
+    ASSERT_EQ(mainItem.getPath(), result.begin()->getPath());
+  }
+}
+#endif
+
+
+  TEST(Suggestion, emptyQuery) {
+    std::vector<std::string> titles = {
+                                        "fooland",
+                                        "berlin wall",
+                                        "hotel berlin, berlin",
+                                        "again berlin",
+                                        "berlin",
+                                        "not berlin"
+                                      };
+
+    TempZimArchive tza("testZim");
+    const zim::Archive archive = tza.createZimFromTitles(titles);
+
+    std::vector<std::string> resultSet = getSuggestions(archive, "", archive.getEntryCount());
+    std::vector<std::string> expectedResult = {};
+
+    ASSERT_EQ(resultSet, expectedResult);
+  }
+
+  TEST(Suggestion, noResult) {
+    std::vector<std::string> titles = {
+                                        "fooland"
+                                        "berlin wall",
+                                        "hotel berlin, berlin",
+                                        "again berlin",
+                                        "berlin",
+                                        "not berlin"
+                                      };
+
+    TempZimArchive tza("testZim");
+    const zim::Archive archive = tza.createZimFromTitles(titles);
+
+    std::vector<std::string> resultSet = getSuggestions(archive, "none", archive.getEntryCount());
+    std::vector<std::string> expectedResult = {};
+
+    ASSERT_EQ(resultSet, expectedResult);
+  }
+
+  TEST(Suggestion, singleTermOrder) {
+    std::vector<std::string> titles = {
+                                        "fooland",
+                                        "berlin wall",
+                                        "hotel berlin, berlin",
+                                        "again berlin",
+                                        "berlin",
+                                        "not berlin"
+                                      };
+
+    TempZimArchive tza("testZim");
+    const zim::Archive archive = tza.createZimFromTitles(titles);
+
+    std::vector<std::string> resultSet = getSuggestions(archive, "berlin", archive.getEntryCount());
+    std::vector<std::string> expectedResult = {
+                                                "berlin",
+                                                "berlin wall",
+                                                "hotel berlin, berlin",
+                                                "again berlin",
+                                                "not berlin"
+                                              };
+
+    ASSERT_EQ(expectedResult , resultSet);
+  }
+
+  TEST(Suggestion, caseDiacriticsAndHomogrpaphsHandling) {
+    std::vector<std::string> titles = {
+                                        "nonberlin",
+                                        "simply berlin",
+                                        "accented bÃ©rlin",
+                                        "uppercase BERLIN",
+                                        "homograph bÐµrlin", // Ðµ is cyrillic
+                                      };
+
+    TempZimArchive tza("testZim");
+    const zim::Archive archive = tza.createZimFromTitles(titles);
+
+    const std::vector<std::string> expectedResult{
+                                                   "accented bÃ©rlin",
+                                                   "simply berlin",
+                                                   "uppercase BERLIN",
+                                                 };
+
+    ASSERT_EQ(getSuggestions(archive, "berlin", archive.getEntryCount()),
+              expectedResult
+    );
+
+    ASSERT_EQ(getSuggestions(archive, "BERLIN", archive.getEntryCount()),
+              expectedResult
+    );
+
+    ASSERT_EQ(getSuggestions(archive, "bÃªÅlÃ¯Ã±", archive.getEntryCount()),
+              expectedResult
+    );
+
+    // Ðµ in the query string "bÐµrlin" below is a cyrillic character
+    ASSERT_EQ(getSuggestions(archive, "bÐµrlin", archive.getEntryCount()),
+              std::vector<std::string>{"homograph bÐµrlin"}
+    );
+  }
+
+  TEST(Suggestion, resultsGreaterThanLimit) {
+    std::vector<std::string> titles = {
+                                        "foobar b",
+                                        "foobar a",
+                                        "foobar c",
+                                        "foobar e",
+                                        "foobar d"
+                                      };
+
+    TempZimArchive tza("testZim");
+    const zim::Archive archive = tza.createZimFromTitles(titles);
+
+    std::vector<std::string> resultSet = getSuggestions(archive, "foobar", 2);
+    std::vector<std::string> expectedResult = {
+                                                "foobar a",
+                                                "foobar b"
+                                              };
+
+    ASSERT_EQ(expectedResult, resultSet);
+  }
+
+  TEST(Suggestion, partialQuery) {
+    std::vector<std::string> titles = {
+                                        "The chocolate factory",
+                                        "The wolf of Shingashina",
+                                        "The wolf of Wall Street",
+                                        "Hour of the wolf",
+                                        "Wolf",
+                                        "Terma termb the wolf of wall street termc"
+                                      };
+
+    TempZimArchive tza("testZim");
+    const zim::Archive archive = tza.createZimFromTitles(titles);
+
+    // "wo"
+    std::vector<std::string> resultSet = getSuggestions(archive, "Wo", archive.getEntryCount());
+    std::vector<std::string> expectedResult = {
+                                                "Wolf",
+                                                "Hour of the wolf",
+                                                "The wolf of Shingashina",
+                                                "The wolf of Wall Street",
+                                                "Terma termb the wolf of wall street termc"
+                                              };
+
+    ASSERT_EQ(expectedResult, resultSet);
+  }
+
+  TEST(Suggestion, phraseOrder) {
+    std::vector<std::string> titles = {
+                                        "summer winter autumn",
+                                        "winter autumn summer terma",
+                                        "autumn summer winter",
+                                        "control document",
+                                        "summer",
+                                      };
+
+    TempZimArchive tza("testZim");
+    const zim::Archive archive = tza.createZimFromTitles(titles);
+
+    std::vector<std::string> resultSet = getSuggestions(archive, "winter autumn summer", archive.getEntryCount());
+    std::vector<std::string> expectedResult = {
+                                                "winter autumn summer terma",
+                                                "autumn summer winter",
+                                                "summer winter autumn"
+                                              };
+
+    ASSERT_EQ(expectedResult, resultSet);
+  }
+
+  TEST(Suggestion, incrementalSearch) {
+    std::vector<std::string> titles = {
+                                        "The chocolate factory",
+                                        "The wolf of Shingashina",
+                                        "The wolf of Wall Street",
+                                        "The wolf among sheeps",
+                                        "The wolf of Wall Street Book" ,
+                                        "Hour of the wolf",
+                                        "Wolf",
+                                        "Terma termb the wolf of wall street termc"
+                                      };
+
+    std::vector<std::string> resultSet, expectedResult;
+
+    TempZimArchive tza("testZim");
+    const zim::Archive archive = tza.createZimFromTitles(titles);
+
+    // "wolf"
+    resultSet = getSuggestions(archive, "Wolf", archive.getEntryCount());
+    expectedResult = {
+                       "Wolf",
+                       "Hour of the wolf",
+                       "The wolf among sheeps",
+                       "The wolf of Shingashina",
+                       "The wolf of Wall Street",
+                       "The wolf of Wall Street Book",
+                       "Terma termb the wolf of wall street termc"
+                     };
+
+    ASSERT_EQ(expectedResult, resultSet);
+
+    // "the"
+    resultSet = getSuggestions(archive, "the", archive.getEntryCount());
+    expectedResult = {
+                       "The chocolate factory",
+                       "The wolf among sheeps",
+                       "The wolf of Shingashina",
+                       "The wolf of Wall Street",
+                       "The wolf of Wall Street Book",
+                       "Hour of the wolf",
+                       "Terma termb the wolf of wall street termc"
+                     };
+
+    ASSERT_EQ(expectedResult, resultSet);
+
+    // "the wolf"
+    resultSet = getSuggestions(archive, "the wolf", archive.getEntryCount());
+    expectedResult = {
+                       "The wolf among sheeps",
+                       "The wolf of Shingashina",
+                       "The wolf of Wall Street",
+                       "The wolf of Wall Street Book",
+                       "Hour of the wolf",
+                       "Terma termb the wolf of wall street termc"
+                     };
+
+    ASSERT_EQ(expectedResult, resultSet);
+
+    // "the wolf of"
+    resultSet = getSuggestions(archive, "the wolf of", archive.getEntryCount());
+    expectedResult = {
+                       "The wolf of Shingashina",
+                       "The wolf of Wall Street",
+                       "The wolf of Wall Street Book",
+                       "Terma termb the wolf of wall street termc",
+                       "Hour of the wolf"
+                     };
+
+    ASSERT_EQ(expectedResult, resultSet);
+
+    // "the wolf of wall"
+    resultSet = getSuggestions(archive, "the wolf of wall", archive.getEntryCount());
+    expectedResult = {
+                       "The wolf of Wall Street",
+                       "The wolf of Wall Street Book",
+                       "Terma termb the wolf of wall street termc"
+                     };
+
+    ASSERT_EQ(expectedResult, resultSet);
+  }
+
+  TEST(Suggestion, phraseOutOfWindow) {
+    std::vector<std::string> titles = {
+                                        "This query",
+                                        "This is the dummy query phrase",
+                                        "the aterm bterm dummy cterm query",
+                                        "aterm the bterm dummy query cterm"
+                                      };
+
+    TempZimArchive tza("testZim");
+    const zim::Archive archive = tza.createZimFromTitles(titles);
+
+    std::vector<std::string> resultSet = getSuggestions(archive, "the dummy query", archive.getEntryCount());
+    std::vector<std::string> expectedResult = {
+                                                "This is the dummy query phrase",
+                                                "aterm the bterm dummy query cterm",
+                                                "the aterm bterm dummy cterm query"
+                                              };
+
+    ASSERT_EQ(expectedResult, resultSet);
+  }
+
+  TEST(Suggestion, checkStopword) {
+    std::vector<std::string> titles = {
+                                        "she and the apple",
+                                        "apple",
+                                        "she and the"
+                                      };
+
+    TempZimArchive tza("testZim");
+    const zim::Archive archive = tza.createZimFromTitles(titles);
+
+    // "she", "and", "the" are stopwords, If stopwords are properly handled, they
+    // should be included in the result documents.
+    std::vector<std::string> resultSet = getSuggestions(archive, "she and the apple", archive.getEntryCount());
+    std::vector<std::string> expectedResult = {
+                                                "she and the apple"
+                                              };
+    ASSERT_EQ(expectedResult, resultSet);
+  }
+
+  TEST(Suggestion, checkRedirectionCollapse) {
+    TempZimArchive tza("testZim");
+    zim::writer::Creator creator;
+    creator.configIndexing(true, "en");
+    creator.startZimCreation(tza.getPath());
+
+    auto item = std::make_shared<TestItem>("testPath", "text/html", "Article Target");
+    creator.addItem(item);
+    creator.addRedirection("redirectionPath1", "Article Redirect 1", "testPath");
+    creator.addRedirection("redirectionPath2", "Article Redirect 2", "testPath");
+
+    creator.addMetadata("Title", "Test zim");
+    creator.finishZimCreation();
+
+    zim::Archive archive(tza.getPath());
+    std::vector<std::string> resultSet = getSuggestions(archive, "Article", archive.getEntryCount());
+
+    // We should get only one result
+    std::vector<std::string> expectedResult = {
+                                                "Article Target",
+                                              };
+    ASSERT_EQ(resultSet, expectedResult);
+  }
+
+  TEST(Suggestion, checkRedirectionChain) {
+    /*
+     * As of now, we do not handle redirection chain. So if we have articles such
+     * as A->B->C. Even if A B and C are essentially the same articles, They won't
+     * get collapsed as one.
+     */
+    TempZimArchive tza("testZim");
+    zim::writer::Creator creator;
+    creator.configIndexing(true, "en");
+    creator.startZimCreation(tza.getPath());
+
+    auto item = std::make_shared<TestItem>("testPath", "text/html", "Article Target");
+    creator.addItem(item);
+    creator.addRedirection("redirectionPath1", "Article Redirect 1", "testPath");
+    creator.addRedirection("redirectionPath2", "Article Redirect 2", "redirectionPath1", {{zim::writer::FRONT_ARTICLE, 1}});
+
+    creator.addMetadata("Title", "Test zim");
+    creator.finishZimCreation();
+
+    zim::Archive archive(tza.getPath());
+    std::vector<std::string> resultSet = getSuggestions(archive, "Article", archive.getEntryCount());
+
+    // We should get only one result
+    std::vector<std::string> expectedResult = {
+                                                "Article Target",
+                                                "Article Redirect 2"
+                                              };
+    ASSERT_EQ(resultSet, expectedResult);
+  }
+
+  // Different articles with same title should not be collapsed in suggestions
+  TEST(Suggestion, diffArticleSameTitle) {
+    TempZimArchive tza("testZim");
+    zim::writer::Creator creator;
+    creator.configIndexing(true, "en");
+    creator.startZimCreation(tza.getPath());
+
+    auto item1 = std::make_shared<TestItem>("testPath1", "text/html", "Test Article");
+    auto item2 = std::make_shared<TestItem>("testPath2", "text/html", "Test Article");
+    creator.addItem(item1);
+    creator.addItem(item2);
+
+    creator.addMetadata("Title", "Test zim");
+    creator.finishZimCreation();
+
+    zim::Archive archive(tza.getPath());
+    std::vector<std::string> resultSet = getSuggestions(archive, "Test Article", archive.getEntryCount());
+
+    // We should get two results
+    std::vector<std::string> expectedResult = {
+                                                "Test Article",
+                                                "Test Article"
+                                              };
+    ASSERT_EQ(resultSet, expectedResult);
+  }
+
+  // Titles which begins with the search string should have higher relevance
+  TEST(Suggestion, anchorQueryToBeginning) {
+    std::vector<std::string> titles = {
+                                        "aterm bterm this is a title cterm",
+                                        "this is a title aterm bterm cterm",
+                                        "aterm this is a title bterm cterm"
+                                      };
+
+    TempZimArchive tza("testZim");
+    const zim::Archive archive = tza.createZimFromTitles(titles);
+
+    std::vector<std::string> resultSet = getSuggestions(archive, "This is a title", archive.getEntryCount());
+    std::vector<std::string> expectedResult = {
+                                                "this is a title aterm bterm cterm",
+                                                "aterm bterm this is a title cterm",
+                                                "aterm this is a title bterm cterm"
+                                              };
+
+    ASSERT_EQ(expectedResult, resultSet);
+  }
+
+  // To secure compatibity of new zim files with older kiwixes, we need to index
+  // full path of the entries as data of documents.
+  TEST(Suggestion, indexFullPath) {
+    TempZimArchive tza("testZim");
+    zim::writer::Creator creator;
+    creator.configIndexing(true, "en");
+    creator.startZimCreation(tza.getPath());
+
+    auto item = std::make_shared<TestItem>("testPath", "text/html", "Test Article");
+    creator.addItem(item);
+
+    creator.addMetadata("Title", "Test zim");
+    creator.finishZimCreation();
+
+    zim::Archive archive(tza.getPath());
+
+    zim::SuggestionSearcher suggestionSearcher(archive);
+    auto suggestionSearch = suggestionSearcher.suggest("Test Article");
+    auto result = suggestionSearch.getResults(0, archive.getEntryCount());
+
+    ASSERT_EQ(result.begin()->getPath(), "testPath");
+    ASSERT_EQ(result.begin().getDbData().substr(0, 2), "C/");
+  }
+
+  TEST(Suggestion, nonWordCharacters) {
+    TempZimArchive tza("testZim");
+    {
+      const zim::Archive archive = tza.createZimFromTitles({
+        "Alice Bob",
+        "Bonnie + Clyde",
+        "Jack & Jill, on the hill"
+      });
+
+      EXPECT_SUGGESTION_RESULTS(archive, "Alice & Bob",
+        "Alice Bob"
+      );
+
+      EXPECT_SUGGESTION_RESULTS(archive, "Bonnie + Clyde",
+        "Bonnie + Clyde"
+      );
+
+      EXPECT_SUGGESTION_RESULTS(archive, "Jack & Jill",
+        "Jack & Jill, on the hill"
+      );
+    }
+  }
+
+  TEST(Suggestion, titleSnippet) {
+    TempZimArchive tza("testzim");
+
+    const zim::Archive archive = tza.createZimFromTitles({
+      "this is a straight run of matching words",
+      "this is a broken set of likely words",
+      "this is a long title to ensure that the snippets generated contain the entire title even if match is one word"
+    });
+
+    EXPECT_SNIPPET_EQ(
+      archive,
+      1,
+      "straight run of matching",
+      {
+        "this is a <b>straight</b> <b>run</b> <b>of</b> <b>matching</b> words"
+      }
+    );
+
+    EXPECT_SNIPPET_EQ(
+      archive,
+      1,
+      "broken likely",
+      {
+        "this is a <b>broken</b> set of <b>likely</b> words"
+      }
+    );
+
+    EXPECT_SNIPPET_EQ(
+      archive,
+      1,
+      "generated",
+      {
+        "this is a long title to ensure that the snippets <b>generated</b> contain the entire title even if match is one word"
+      }
+    );
+
+    EXPECT_SNIPPET_EQ(
+      archive,
+      archive.getEntryCount(),
+      "this is",
+      {
+        "<b>this</b> <b>is</b> a broken set of likely words",
+        "<b>this</b> <b>is</b> a straight run of matching words",
+        "<b>this</b> <b>is</b> a long title to ensure that the snippets generated contain the entire title even if match <b>is</b> one word"
+      }
+    );
+  }
+
+  TEST(Suggestion, reuseSearcher) {
+    std::vector<std::string> titles = {
+                                        "song for you",
+                                        "sing a song for you",
+                                        "a song b for c you",
+                                        "song for someone"
+                                      };
+
+    TempZimArchive tza("testZim");
+    const zim::Archive archive = tza.createZimFromTitles(titles);
+
+    zim::SuggestionSearcher suggestionSearcher(archive);
+    suggestionSearcher.setVerbose(true);
+    auto suggestionSearch1 = suggestionSearcher.suggest("song for you");
+    auto suggestionResult1 = suggestionSearch1.getResults(0, 2);
+
+    int count = 0;
+    for (auto entry : suggestionResult1) {
+      count++;
+    }
+
+    auto suggestionSearch2 = suggestionSearcher.suggest("song for you");
+    auto suggestionResult2 = suggestionSearch2.getResults(2, archive.getEntryCount());
+
+    for (auto entry : suggestionResult2) {
+      count++;
+    }
+    ASSERT_EQ(count, 3);
+  }
+}
diff --git a/test/suggestion_iterator.cpp b/test/suggestion_iterator.cpp
new file mode 100644
index 0000000..f82a93f
--- /dev/null
+++ b/test/suggestion_iterator.cpp
@@ -0,0 +1,201 @@
+/*
+ * Copyright (C) 2021 Maneesh P M <manu.pm55@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#define ZIM_PRIVATE
+#include <zim/archive.h>
+#include <zim/suggestion.h>
+#include <zim/suggestion_iterator.h>
+#include <zim/error.h>
+#include "tools.h"
+
+#include "gtest/gtest.h"
+
+namespace {
+
+using zim::unittests::TempZimArchive;
+
+#if defined(ENABLE_XAPIAN)
+
+TEST(suggestion_iterator, end) {
+  TempZimArchive tza("testZim");
+
+  zim::Archive archive = tza.createZimFromContent({
+    {"article 1", "item a"}
+  });
+
+  zim::SuggestionSearcher searcher(archive);
+  auto search = searcher.suggest("item");
+  auto result = search.getResults(0, archive.getEntryCount());
+
+  auto it = result.end();
+
+  ASSERT_THROW(it.getEntry(), std::runtime_error);
+  ASSERT_THROW(*it, std::runtime_error);
+}
+
+TEST(suggestion_iterator, copy) {
+  TempZimArchive tza("testZim");
+
+  zim::Archive archive = tza.createZimFromContent({
+    {"article 1", "item a"}
+  });
+
+  zim::SuggestionSearcher searcher(archive);
+  auto search = searcher.suggest("article");
+  auto result = search.getResults(0, archive.getEntryCount());
+
+  auto it = result.begin();
+
+  auto it2 = it;
+  ASSERT_EQ(it->getTitle(), it2->getTitle());
+
+  it = result.end();
+  it2 = it;
+  ASSERT_EQ(it, it2);
+  ASSERT_THROW(it->getTitle(), std::runtime_error);
+  ASSERT_THROW(it2->getTitle(), std::runtime_error);
+}
+
+TEST(suggestion_iterator, functions) {
+  TempZimArchive tza("testZim");
+
+  zim::Archive archive = tza.createZimFromContent({
+    {"article 1", "item a"}
+  });
+
+  zim::SuggestionSearcher searcher(archive);
+  auto search = searcher.suggest("article");
+  auto result = search.getResults(0, archive.getEntryCount());
+
+  auto it = result.begin();
+
+  // Test functions
+  ASSERT_EQ(it->getTitle(), "article 1");
+  ASSERT_EQ(it->getPath(), "dummyPatharticle 1");
+
+  auto entry = it.getEntry();
+  ASSERT_EQ(entry.getTitle(), "article 1");
+}
+
+TEST(suggestion_iterator, iteration) {
+  TempZimArchive tza("testZim");
+
+  zim::Archive archive = tza.createZimFromContent({
+    {"article a", "item a"},
+    {"article b", "item b"}
+  });
+
+  zim::SuggestionSearcher searcher(archive);
+  auto search = searcher.suggest("article");
+  auto result = search.getResults(0, archive.getEntryCount());
+  auto it1 = result.begin();
+
+  zim::SuggestionIterator it = it1;
+  ASSERT_EQ(it->getTitle(), result.begin()->getTitle());
+
+  ASSERT_EQ(it->getTitle(), "article a");
+  it++;
+  ASSERT_EQ(it->getTitle(), "article b");
+  ASSERT_TRUE(it != it1);
+  ASSERT_FALSE(it == it1);
+
+  it--;
+  ASSERT_EQ(it->getTitle(), "article a");
+  ASSERT_TRUE(result.begin() == it);
+  it++; it++;
+  ASSERT_TRUE(it == result.end());
+}
+
+#endif  // ENABLE_XAPIAN
+
+TEST(suggestion_iterator, rangeBased) {
+  TempZimArchive tza("testZim");
+
+  zim::Archive archive = tza.createZimFromContent({
+    {"article a", "item a"},
+    {"article b", "item b"},
+    {"random c", "random c"}
+  });
+
+  zim::SuggestionSearcher searcher(archive);
+  auto search = searcher.suggest("article");
+
+#if defined(ENABLE_XAPIAN)
+  search.forceRangeSuggestion();    // Close xapian db to force rangeBased search
+#endif  // ENABLE_XAPIAN
+
+  ASSERT_EQ(search.getEstimatedMatches(), 2);
+  auto srs = search.getResults(0, archive.getEntryCount());
+  ASSERT_EQ(srs.size(), 2);
+
+  auto it1 = srs.begin();
+  ASSERT_EQ(it1->getTitle(), "article a");
+  ASSERT_EQ(it1.getEntry().getPath(), "dummyPatharticle a");
+
+  auto suggestionItem = *it1;
+  ASSERT_FALSE(suggestionItem.hasSnippet());
+  ASSERT_EQ(suggestionItem.getTitle(), "article a");
+
+  zim::SuggestionIterator it2 = it1;
+  ASSERT_EQ(it1->getTitle(), it2->getTitle());
+
+  it1++;
+  ASSERT_EQ(it1->getTitle(), "article b");
+  it1--;
+  ASSERT_EQ(it1->getTitle(), "article a");
+
+  it2 = it1;
+  ASSERT_TRUE(it2 == it1);
+
+  it2 = srs.end();
+  ASSERT_EQ(it2->getTitle(), "random c");
+}
+
+#if defined(ENABLE_XAPIAN)
+TEST(search_iterator, stemmedSearch) {
+  TempZimArchive tza("testZim");
+
+  // The following stemming occurs
+  // apple -> appl
+  // charlie -> charli
+  // chocolate -> chocol
+  // factory -> factori
+  zim::Archive archive = tza.createZimFromTitles({
+    "an apple a day, keeps the doctor away",
+    "charlie and the chocolate factory"
+  });
+
+  zim::SuggestionSearcher searcher(archive);
+
+  auto search = searcher.suggest("apples");
+  auto result = search.getResults(0, 1);
+  ASSERT_EQ(result.begin()->getSnippet(), "an <b>apple</b> a day, keeps the doctor away");
+
+  search = searcher.suggest("chocolate factory");
+  result = search.getResults(0, 1);
+  ASSERT_EQ(result.begin()->getSnippet(), "charlie and the <b>chocolate</b> <b>factory</b>");
+
+  // Test stemming with reused searcher
+  search = searcher.suggest("apples");
+  result = search.getResults(0, 1);
+  ASSERT_EQ(result.begin()->getSnippet(), "an <b>apple</b> a day, keeps the doctor away");
+}
+#endif  // ENABLE_XAPIAN
+
+} // anonymous namespace
diff --git a/test/template.cpp b/test/template.cpp
new file mode 100644
index 0000000..f0a606e
--- /dev/null
+++ b/test/template.cpp
@@ -0,0 +1,78 @@
+/*
+ * Copyright (C) 2009 Tommi Maekitalo
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#include "../src/template.h"
+
+#include "gtest/gtest.h"
+
+namespace
+{
+class TemplateTest : public ::testing::Test, private zim::TemplateParser::Event
+{
+ public:
+  std::string result;
+  zim::TemplateParser parser;
+
+  TemplateTest() : parser(this) {}
+
+ private:
+  void onData(const std::string& data) { result += data; }
+
+  void onToken(const std::string& token)
+  {
+    result += "T(";
+    result += token;
+    result += ')';
+  }
+
+  void onLink(char ns, const std::string& title)
+  {
+    result += "L(";
+    result += ns;
+    result += ", ";
+    result += title;
+    result += ')';
+  }
+};
+
+TEST_F(TemplateTest, ZeroTemplate)
+{
+  parser.parse("<html><body><h1>Hi</h1></body></html>");
+  parser.flush();
+
+  ASSERT_EQ(result, "<html><body><h1>Hi</h1></body></html>");
+}
+
+TEST_F(TemplateTest, Token)
+{
+  parser.parse("<html><%content%></html>");
+  parser.flush();
+
+  ASSERT_EQ(result, "<html>T(content)</html>");
+}
+
+TEST_F(TemplateTest, Link)
+{
+  parser.parse("<html><%/A/Article%></html>");
+  parser.flush();
+
+  ASSERT_EQ(result, "<html>L(A, Article)</html>");
+}
+
+}  // namespace
diff --git a/test/tinyString.cpp b/test/tinyString.cpp
new file mode 100644
index 0000000..5ef6ce6
--- /dev/null
+++ b/test/tinyString.cpp
@@ -0,0 +1,149 @@
+/*
+ * Copyright (C) 2021 Matthieu Gautier <mgautier@kymeria.fr>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#include "gtest/gtest.h"
+
+#include "../src/writer/tinyString.h"
+
+using namespace zim::writer;
+
+namespace
+{
+
+TEST(TinyStringTest, empty)
+{
+  TinyString s;
+  ASSERT_TRUE(s.empty());
+  ASSERT_EQ(s.size(), 0);
+  ASSERT_EQ((std::string)s, "");
+  ASSERT_EQ(s, TinyString());
+}
+
+TEST(TinyStringTest, noChar)
+{
+  TinyString s("");
+  ASSERT_TRUE(s.empty());
+  ASSERT_EQ(s.size(), 0);
+  ASSERT_EQ((std::string)s, "");
+  ASSERT_EQ(s, TinyString());
+}
+
+TEST(TinyStringTest, oneChar)
+{
+  TinyString s("A");
+  ASSERT_FALSE(s.empty());
+  ASSERT_EQ(s.size(), 1);
+  ASSERT_EQ((std::string)s, "A");
+  ASSERT_TRUE(s < TinyString("B"));
+  ASSERT_EQ(s, TinyString("A"));
+  ASSERT_FALSE(s == TinyString("B"));
+}
+
+TEST(TinyStringTest, chars)
+{
+  TinyString s("ABCDE");
+  ASSERT_FALSE(s.empty());
+  ASSERT_EQ(s.size(), 5);
+  ASSERT_EQ((std::string)s, "ABCDE");
+  ASSERT_FALSE(s < TinyString());
+  ASSERT_FALSE(s < TinyString(""));
+  ASSERT_FALSE(s < TinyString("A"));
+  ASSERT_FALSE(s < TinyString("ABCD"));
+  ASSERT_FALSE(s < TinyString("AACDE"));
+  ASSERT_TRUE(TinyString() < s);
+  ASSERT_TRUE(TinyString("") < s);
+  ASSERT_TRUE(TinyString("A") < s);
+  ASSERT_TRUE(TinyString("ABCD") < s);
+  ASSERT_TRUE(TinyString("AACDE") < s);
+  ASSERT_TRUE(s == s);
+  ASSERT_FALSE(s < s);
+}
+
+TEST(PathTitleTinyString, none)
+{
+  PathTitleTinyString s;
+  ASSERT_TRUE(s.empty());
+  ASSERT_EQ(s.size(), 0);
+  ASSERT_EQ((std::string)s, "");
+  ASSERT_EQ(s, TinyString());
+  ASSERT_EQ(s.getPath(), "");
+  ASSERT_EQ(s.getTitle(false), "");
+  ASSERT_EQ(s.getTitle(true), "");
+}
+
+TEST(PathTitleTinyString, empty)
+{
+  //We have the separator between path and title
+  PathTitleTinyString s("", "");
+  ASSERT_FALSE(s.empty());
+  ASSERT_EQ(s.size(), 1);
+  ASSERT_EQ((std::string)s, std::string("", 1));
+  ASSERT_EQ(s.getPath(), "");
+  ASSERT_EQ(s.getTitle(false), "");
+  ASSERT_EQ(s.getTitle(true), "");
+}
+
+TEST(PathTitleTinyString, no_title)
+{
+  //We have the separator between path and title
+  PathTitleTinyString s("FOO", "");
+  ASSERT_FALSE(s.empty());
+  ASSERT_EQ(s.size(), 4);
+  ASSERT_EQ((std::string)s, std::string("FOO\0", 4));
+  ASSERT_EQ(s.getPath(), "FOO");
+  ASSERT_EQ(s.getTitle(false), "FOO");
+  ASSERT_EQ(s.getTitle(true), "");
+}
+
+TEST(PathTitleTinyString, no_path)
+{
+  //We have the separator between path and title
+  PathTitleTinyString s("", "BAR");
+  ASSERT_FALSE(s.empty());
+  ASSERT_EQ(s.size(), 4);
+  ASSERT_EQ((std::string)s, std::string("\0BAR", 4));
+  ASSERT_EQ(s.getPath(), "");
+  ASSERT_EQ(s.getTitle(false), "BAR");
+  ASSERT_EQ(s.getTitle(true), "BAR");
+}
+
+TEST(PathTitleTinyString, path_title)
+{
+  //We have the separator between path and title
+  PathTitleTinyString s("FOO", "BAR");
+  ASSERT_FALSE(s.empty());
+  ASSERT_EQ(s.size(), 7);
+  ASSERT_EQ((std::string)s, std::string("FOO\0BAR", 7));
+  ASSERT_EQ(s.getPath(), "FOO");
+  ASSERT_EQ(s.getTitle(false), "BAR");
+  ASSERT_EQ(s.getTitle(true), "BAR");
+}
+
+TEST(PathTitleTinyString, equal_path_title)
+{
+  //We have the separator between path and title
+  PathTitleTinyString s("FOO", "FOO");
+  ASSERT_FALSE(s.empty());
+  ASSERT_EQ(s.size(), 4);
+  ASSERT_EQ((std::string)s, std::string("FOO\0", 4));
+  ASSERT_EQ(s.getPath(), "FOO");
+  ASSERT_EQ(s.getTitle(false), "FOO");
+  ASSERT_EQ(s.getTitle(true), "");
+}
+}  // namespace
diff --git a/test/tools.cpp b/test/tools.cpp
new file mode 100644
index 0000000..0ad3026
--- /dev/null
+++ b/test/tools.cpp
@@ -0,0 +1,201 @@
+/*
+ * Copyright (C) 2020 Veloman Yunkan
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#include "tools.h"
+
+#ifdef _WIN32
+#include <locale>
+#include <codecvt>
+#include <windows.h>
+#include <fileapi.h>
+#include <io.h>
+#else
+#include <dirent.h>
+#endif
+
+#include "../src/fs.h"
+
+#include <fcntl.h>
+#include <sys/stat.h>
+#include "gtest/gtest.h"
+
+namespace zim
+{
+
+namespace unittests
+{
+
+TempFile::TempFile(const char* name)
+ : fd_(-1)
+{
+#ifdef _WIN32
+  std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> utfConv;
+  wchar_t cbase[MAX_PATH];
+  const std::wstring wname = utfConv.from_bytes(name);
+  GetTempPathW(MAX_PATH-(wname.size()+2), cbase);
+  //This create a empty file, we just have to open it later
+  GetTempFileNameW(cbase, wname.c_str(), 0, wpath_);
+  path_ = utfConv.to_bytes(wpath_);
+#else
+  const char* const TMPDIR = std::getenv("TMPDIR");
+  const std::string tmpdir(TMPDIR ? TMPDIR : "/tmp");
+  path_ = tmpdir + "/" + name + "_XXXXXX";
+  auto tmp_fd = mkstemp(&path_[0]);
+  ::close(tmp_fd);
+#endif
+}
+
+TempFile::~TempFile()
+{
+  close();
+#ifdef _WIN32
+  DeleteFileW(wpath_);
+#else
+  unlink(path_.c_str());
+#endif
+}
+
+int TempFile::fd()
+{
+  if (fd_ == -1) {
+#ifdef _WIN32
+    fd_ = _wopen(wpath_, _O_RDWR | _O_BINARY);
+#else
+    fd_ = open(path_.c_str(), O_RDWR);
+#endif
+  }
+  return fd_;
+}
+
+void TempFile::close()
+{
+  if (fd_ != -1) {
+	::close(fd_);
+	fd_ = -1;
+  }
+}
+
+std::unique_ptr<TempFile>
+makeTempFile(const char* name, const std::string& content)
+{
+  std::unique_ptr<TempFile> p(new TempFile(name));
+  write(p->fd(), &content[0], content.size());
+  p->close();
+  return p;
+}
+
+void setDataDir(std::string& dataDir)
+{
+  // FAIL must be used in a void function. So we need to use a out parameter.
+  const char* cDataDir = std::getenv("ZIM_TEST_DATA_DIR");
+  if (cDataDir == NULL) {
+    dataDir = "INVALID_DATA_DIR";
+    FAIL() << "ZIM_TEST_DATA_DIR is not defined. You must define it to the directory containing test zim files.";
+  }
+  dataDir = cDataDir;
+}
+
+TestFile::TestFile(const std::string& dataDir, const std::string& category, const std::string& filename) :
+  filename(filename),
+  category(category),
+  path(zim::DEFAULTFS::join(zim::DEFAULTFS::join(dataDir, category), filename))
+{
+}
+
+const std::vector<TestFile> getDataFilePath(const std::string& filename, const std::string& category)
+{
+  std::vector<TestFile> filePaths;
+  std::string dataDirPath;
+  setDataDir(dataDirPath);
+
+  if (!category.empty()) {
+      // We have asked for a particular category.
+      filePaths.emplace_back(dataDirPath, category, filename);
+  } else {
+#ifdef _WIN32
+    // We don't have dirent.h in windows.
+    // If we move to test data out of the repository, we will need a way to discover the data.
+    // Use a static list of categories for now.
+    for (auto& category: {"withns", "nons"}) {
+      filePaths.emplace_back(dataDirPath, category, filename);
+    }
+#else
+    auto dataDir = opendir(dataDirPath.c_str());
+
+    if (!dataDir) {
+      filePaths.emplace_back(dataDirPath, "NO_DATA_DIR", filename);
+      return filePaths;
+    }
+    struct dirent* current = NULL;
+    while((current = readdir(dataDir))) {
+      if (current->d_name[0] == '.' || current->d_name[0] == '_') {
+        continue;
+      }
+      filePaths.emplace_back(dataDirPath, current->d_name, filename);
+    }
+    closedir(dataDir);
+#endif
+  }
+
+  return filePaths;
+}
+
+zim::Archive TempZimArchive::createZimFromTitles(std::vector<std::string> titles) {
+  zim::writer::Creator creator;
+  creator.configIndexing(true, "en");
+  creator.startZimCreation(this->path());
+
+  // add dummy items with given titles
+  for (auto title : titles) {
+    std::string path = "dummyPath" + title;
+    auto item = std::make_shared<TestItem>(path, "text/html", title);
+    creator.addItem(item);
+  }
+
+  creator.addMetadata("Title", "This is a title");
+
+  creator.finishZimCreation();
+  return zim::Archive(this->path());
+}
+
+zim::Archive TempZimArchive::createZimFromContent(std::vector<std::vector<std::string>> contents) {
+  zim::writer::Creator creator;
+  creator.configIndexing(true, "en");
+  creator.startZimCreation(this->path());
+
+  // add dummy items with given titles
+  for (auto content : contents) {
+    std::string path = "dummyPath" + content[0];
+    auto item = std::make_shared<TestItem>(path, "text/html", content[0], content[1]);
+    creator.addItem(item);
+  }
+
+  creator.addMetadata("Title", "This is a title");
+
+  creator.finishZimCreation();
+  return zim::Archive(this->path());
+}
+
+const std::string TempZimArchive::getPath() {
+  return this->path();
+}
+
+} // namespace unittests
+
+} // namespace zim
diff --git a/test/tools.h b/test/tools.h
new file mode 100644
index 0000000..f5aa2eb
--- /dev/null
+++ b/test/tools.h
@@ -0,0 +1,202 @@
+/*
+ * Copyright (C) 2020 Veloman Yunkan
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#ifndef ZIM_TEST_TOOLS_H
+#define ZIM_TEST_TOOLS_H
+
+
+#include <string>
+#include <vector>
+#include <sys/types.h>
+#ifdef _WIN32
+#include <windows.h>
+#include <io.h>
+#define LSEEK _lseeki64
+#else
+#include <unistd.h>
+#define LSEEK lseek
+#endif
+
+#include "../src/buffer.h"
+#include <limits.h>
+
+#define ZIM_PRIVATE
+#include <zim/archive.h>
+#include <zim/search.h>
+#include <zim/writer/creator.h>
+#include <zim/writer/item.h>
+#include <zim/writer/contentProvider.h>
+
+namespace zim
+{
+
+namespace unittests
+{
+
+// TempFile is a utility class for working with temporary files in RAII fashion:
+//
+//   1. An empty temporary file is created (in the temporary file directory)
+//      by the constructor.
+//
+//   2. The file can be filled with data via the file descriptor (returned
+//      by the fd() member function).
+//
+//      -------------------------------------------------------------
+//      | IMPORTANT!                                                |
+//      |                                                           |
+//      | The file descriptor must NOT be close()-ed. Under Windows |
+//      | this will result in the file being removed.               |
+//      -------------------------------------------------------------
+//
+//   3. The destructor automatically (closes and) removes the file
+//
+class TempFile
+{
+  int fd_;
+  std::string path_;
+#ifdef _WIN32
+  wchar_t wpath_[MAX_PATH];
+#endif
+public:
+  // Creates an empty file in the temporary directory (under Linux and friends
+  // its path is read from the TMPDIR environment variable or defaults to /tmp)
+  explicit TempFile(const char* name);
+
+  TempFile(const TempFile& ) = delete;
+  void operator=(const TempFile& ) = delete;
+
+  // Closes and removes the file
+  ~TempFile();
+
+  // Close the file descriptor if opened
+  void close();
+
+  // File descriptor
+  // Important! It must NOT be close()-ed
+  int fd();
+
+  // Absolute path of the file
+  std::string path() const { return path_; }
+};
+
+template<typename T>
+std::string to_string(const T& value)
+{
+  std::ostringstream ss;
+  ss << value;
+  return ss.str();
+}
+
+std::unique_ptr<TempFile>
+makeTempFile(const char* name, const std::string& content);
+
+
+template<typename T>
+zim::Buffer write_to_buffer(const T& object, const std::string& tail="")
+{
+  TempFile tmpFile("test_temp_file");
+  const auto tmp_fd = tmpFile.fd();
+  object.write(tmp_fd);
+  write(tmp_fd, tail.data(), tail.size());
+  size_type size = LSEEK(tmp_fd, 0, SEEK_END);
+
+  auto buf = zim::Buffer::makeBuffer(zim::zsize_t(size));
+  LSEEK(tmp_fd, 0, SEEK_SET);
+  char* p = const_cast<char*>(buf.data());
+  while ( size != 0 ) {
+    const auto size_to_read = std::min(size, size_type{1024*1024});
+    const auto n = read(tmp_fd, p, size_to_read);
+    if ( n == -1 )
+      throw std::runtime_error("Cannot read " + tmpFile.path());
+    p += n;
+    size -= n;
+  }
+  return buf;
+}
+
+struct TestFile {
+  TestFile(const std::string& dataDir, const std::string& category, const std::string& filename);
+  const std::string filename;
+  const std::string category;
+  const std::string path;
+};
+
+const std::vector<TestFile> getDataFilePath(const std::string& filename, const std::string& category = "");
+
+// Helper class to create temporary zim and remove it once the test is done
+class TempZimArchive : zim::unittests::TempFile {
+  public:
+    explicit TempZimArchive(const char* tempPath) : zim::unittests::TempFile {tempPath} {}
+    zim::Archive createZimFromTitles(std::vector<std::string> titles);
+    zim::Archive createZimFromContent(std::vector<std::vector<std::string>> contents);
+    const std::string getPath();
+};
+
+enum class IsFrontArticle {
+  YES,
+  NO,
+  DEFAULT
+};
+
+class TestItem : public zim::writer::Item {
+  public:
+    TestItem(
+        const std::string& path,
+        const std::string& mimetype = "text/html",
+        const std::string& title = "Test Item",
+        const std::string& content = "foo",
+        IsFrontArticle frontArticle = IsFrontArticle::DEFAULT) :
+      path(path),
+      title(title),
+      content(content),
+      mimetype(mimetype),
+      frontArticle(frontArticle)
+    {}
+    virtual ~TestItem() = default;
+
+    virtual std::string getPath() const { return path; };
+    virtual std::string getTitle() const { return title; };
+    virtual std::string getMimeType() const { return mimetype; };
+    virtual zim::writer::Hints getHints() const {
+      switch (frontArticle) {
+        case IsFrontArticle::YES:
+          return zim::writer::Hints{{zim::writer::FRONT_ARTICLE, 1}};
+        case IsFrontArticle::NO:
+          return zim::writer::Hints{{zim::writer::FRONT_ARTICLE, 0}};
+        default:
+          return zim::writer::Hints();
+      }
+    }
+
+    virtual std::unique_ptr<zim::writer::ContentProvider> getContentProvider() const {
+      return std::unique_ptr<zim::writer::ContentProvider>(new zim::writer::StringProvider(content));
+    }
+
+  std::string path;
+  std::string title;
+  std::string content;
+  std::string mimetype;
+  IsFrontArticle frontArticle;
+};
+
+} // namespace unittests
+
+} // namespace zim
+
+#endif // ZIM_TEST_TOOLS_H
diff --git a/test/tooltesting.cpp b/test/tooltesting.cpp
new file mode 100644
index 0000000..3fa176d
--- /dev/null
+++ b/test/tooltesting.cpp
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2021 Matthieu Gautier <mgautier@kymeria.fr>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#include "../src/tools.h"
+
+#include "gtest/gtest.h"
+
+namespace {
+  TEST(Tools, wordCount) {
+    ASSERT_EQ(zim::countWords(""), 0);
+    ASSERT_EQ(zim::countWords("   "), 0);
+    ASSERT_EQ(zim::countWords("One"), 1);
+    ASSERT_EQ(zim::countWords("One Two Three"), 3);
+    ASSERT_EQ(zim::countWords("  One  "), 1);
+    ASSERT_EQ(zim::countWords("One    Two Three   "), 3);
+    ASSERT_EQ(zim::countWords("One.Two\tThree"), 2);
+  }
+
+
+  TEST(Tools, parseIllustrationPathToSize) {
+    ASSERT_EQ(zim::parseIllustrationPathToSize("Illustration_0x0@1"), 0);
+    ASSERT_EQ(zim::parseIllustrationPathToSize("Illustration_1x1@1"), 1);
+    ASSERT_EQ(zim::parseIllustrationPathToSize("Illustration_01x01@1"), 1);
+    ASSERT_EQ(zim::parseIllustrationPathToSize("Illustration_64x64@1"), 64);
+    ASSERT_EQ(zim::parseIllustrationPathToSize("Illustration_128x128@1"), 128);
+    ASSERT_EQ(zim::parseIllustrationPathToSize("Illustration_1024x1024@1"), 1024);
+    ASSERT_THROW(zim::parseIllustrationPathToSize("Illsration_64x64@1"), std::runtime_error);
+    ASSERT_THROW(zim::parseIllustrationPathToSize("Illstration_"), std::runtime_error);
+    ASSERT_THROW(zim::parseIllustrationPathToSize("Illustration_64x@1"), std::runtime_error);
+    ASSERT_THROW(zim::parseIllustrationPathToSize("Illustration_64x"), std::runtime_error);
+    ASSERT_THROW(zim::parseIllustrationPathToSize("Illustration_64x64"), std::runtime_error);
+    ASSERT_THROW(zim::parseIllustrationPathToSize("Illustration_64x64@1.5"), std::runtime_error);
+    ASSERT_THROW(zim::parseIllustrationPathToSize("Illustration_128x64@1"), std::runtime_error);
+    ASSERT_THROW(zim::parseIllustrationPathToSize("Illustration_-32x-32@1"), std::runtime_error);
+    ASSERT_THROW(zim::parseIllustrationPathToSize("Illustration_ 64x64@1"), std::runtime_error);
+    ASSERT_THROW(zim::parseIllustrationPathToSize("Illustration_64x 64@1"), std::runtime_error);
+    ASSERT_THROW(zim::parseIllustrationPathToSize("Illustration_ 64x 64@1"), std::runtime_error);
+    ASSERT_THROW(zim::parseIllustrationPathToSize("Illustration_1 28x1 28@1"), std::runtime_error);
+  }
+
+}
diff --git a/test/uuid.cpp b/test/uuid.cpp
new file mode 100644
index 0000000..abfdd82
--- /dev/null
+++ b/test/uuid.cpp
@@ -0,0 +1,119 @@
+/*
+ * Copyright (C) 2013 Tommi Maekitalo
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
+ * NON-INFRINGEMENT.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ *
+ */
+
+#include <zim/uuid.h>
+#include <iostream>
+#include <sstream>
+
+#include "gtest/gtest.h"
+#ifdef _WIN32
+# include <windows.h>
+# include <synchapi.h>
+#else
+# include <unistd.h>
+#endif
+
+namespace
+{
+TEST(UuidTest, construct)
+{
+  zim::Uuid uuid1(
+      "\x01\x23\x45\x67\x89\xab\xcd\xef\x10\x32\x54\x76\x98\xba\xdc\xfe");
+  zim::Uuid uuid2(
+      "\x01\x23\x45\x67\x89\xab\xcd\xe0\x10\x32\x54\x76\x98\xba\xdc\x0e");
+
+  ASSERT_TRUE(uuid1 != uuid2);
+  ASSERT_TRUE(uuid1 != zim::Uuid());
+  ASSERT_TRUE(uuid2 != zim::Uuid());
+
+  ASSERT_EQ(uuid1.data[0], '\x01');
+  ASSERT_EQ(uuid1.data[1], '\x23');
+  ASSERT_EQ(uuid1.data[2], '\x45');
+  ASSERT_EQ(uuid1.data[3], '\x67');
+  ASSERT_EQ(uuid1.data[4], '\x89');
+  ASSERT_EQ(uuid1.data[5], '\xab');
+  ASSERT_EQ(uuid1.data[6], '\xcd');
+  ASSERT_EQ(uuid1.data[7], '\xef');
+  ASSERT_EQ(uuid1.data[8], '\x10');
+  ASSERT_EQ(uuid1.data[9], '\x32');
+  ASSERT_EQ(uuid1.data[10], '\x54');
+  ASSERT_EQ(uuid1.data[11], '\x76');
+  ASSERT_EQ(uuid1.data[12], '\x98');
+  ASSERT_EQ(uuid1.data[13], '\xba');
+  ASSERT_EQ(uuid1.data[14], '\xdc');
+  ASSERT_EQ(uuid1.data[15], '\xfe');
+
+  ASSERT_EQ(uuid2.data[0], '\x01');
+  ASSERT_EQ(uuid2.data[1], '\x23');
+  ASSERT_EQ(uuid2.data[2], '\x45');
+  ASSERT_EQ(uuid2.data[3], '\x67');
+  ASSERT_EQ(uuid2.data[4], '\x89');
+  ASSERT_EQ(uuid2.data[5], '\xab');
+  ASSERT_EQ(uuid2.data[6], '\xcd');
+  ASSERT_EQ(uuid2.data[7], '\xe0');
+  ASSERT_EQ(uuid2.data[8], '\x10');
+  ASSERT_EQ(uuid2.data[9], '\x32');
+  ASSERT_EQ(uuid2.data[10], '\x54');
+  ASSERT_EQ(uuid2.data[11], '\x76');
+  ASSERT_EQ(uuid2.data[12], '\x98');
+  ASSERT_EQ(uuid2.data[13], '\xba');
+  ASSERT_EQ(uuid2.data[14], '\xdc');
+  ASSERT_EQ(uuid2.data[15], '\x0e');
+}
+
+TEST(UuidTest, generate)
+{
+  zim::Uuid uuid1;
+  zim::Uuid uuid2;
+  ASSERT_TRUE(uuid1 == uuid2);
+  ASSERT_TRUE(uuid1 == zim::Uuid());
+  ASSERT_TRUE(uuid2 == zim::Uuid());
+
+  uuid1 = zim::Uuid::generate();
+  ASSERT_TRUE(uuid1 != uuid2);
+  ASSERT_TRUE(uuid1 != zim::Uuid());
+  ASSERT_TRUE(uuid2 == zim::Uuid());
+
+  // Since GNU Mach's clock isn't precise hence the time might be
+  // same during generating uuid1 and uuid2 leading to test
+  // failure. To bring the time difference between 2 sleep for a
+  // second. Thanks to Pino Toscano.
+#ifdef _WIN32
+  Sleep(1000);
+#else
+  sleep(1);
+#endif
+
+  uuid2 = zim::Uuid::generate();
+  ASSERT_TRUE(uuid1 != uuid2);
+  ASSERT_TRUE(uuid1 != zim::Uuid());
+  ASSERT_TRUE(uuid2 != zim::Uuid());
+}
+
+TEST(UuidTest, output)
+{
+  zim::Uuid uuid(
+      "\x55\x0e\x84\x00\xe2\x9b\x41\xd4\xa7\x16\x44\x66\x55\x44\x00\x00");
+  std::ostringstream out;
+  out << uuid;
+  std::string s = out.str();
+  ASSERT_EQ(s, "550e8400-e29b-41d4-a716-446655440000");
+  ASSERT_EQ((std::string)uuid, "550e8400-e29b-41d4-a716-446655440000");
+}
+};