diff --git a/src/main/scala/tech/sourced/gemini/Gemini.scala b/src/main/scala/tech/sourced/gemini/Gemini.scala
index d6cf3059..a04d520a 100644
--- a/src/main/scala/tech/sourced/gemini/Gemini.scala
+++ b/src/main/scala/tech/sourced/gemini/Gemini.scala
@@ -129,6 +129,25 @@ class Gemini(session: SparkSession, log: Slf4jLogger, keyspace: String = Gemini.
     ReportResult(duplicates, similarities)
   }
 
+  def isDBEmpty(session: Session, mode: String): Boolean = {
+    var row = session.execute(s"select count(*) from $keyspace.${tables.docFreq} where id='$mode' limit 1").one()
+    if (row.getLong(0) > 0) {
+      return false
+    }
+
+    row = session.execute(s"select count(*) from $keyspace.${tables.hashtables}_$mode").one()
+    if (row.getLong(0) > 0) {
+      return false
+    }
+
+    true
+  }
+
+  def cleanDB(session: Session, mode: String): Unit = {
+    session.execute(s"delete from $keyspace.${tables.docFreq} where id='$mode'")
+    session.execute(s"truncate table $keyspace.${tables.hashtables}_$mode")
+  }
+
   def applySchema(session: Session): Unit = {
     log.debug("CQL: creating schema")
     Source
diff --git a/src/main/scala/tech/sourced/gemini/cmd/HashSparkApp.scala b/src/main/scala/tech/sourced/gemini/cmd/HashSparkApp.scala
index 180ad45f..b4a87d4b 100644
--- a/src/main/scala/tech/sourced/gemini/cmd/HashSparkApp.scala
+++ b/src/main/scala/tech/sourced/gemini/cmd/HashSparkApp.scala
@@ -25,7 +25,8 @@ case class HashAppConfig(
   docFreqFile: String = "",
   verbose: Boolean = false,
   mode: String = Gemini.fileSimilarityMode,
-  gcsKeyFile: String = ""
+  gcsKeyFile: String = "",
+  replace: Boolean = false
 )
 
 /**
@@ -98,6 +99,9 @@ object HashSparkApp extends App with Logging {
     opt[String]("gcs-keyfile")
       .action((x, c) => c.copy(gcsKeyFile = x))
       .text("path to JSON keyfile for authentication in Google Cloud Storage")
+    opt[Unit]("replace")
+      .action((x, c) => c.copy(replace = true))
+      .text("replace results of previous hashing")
     arg[String]("<path-to-git-repos>")
       .required()
       .action((x, c) => c.copy(reposPath = x))
@@ -136,6 +140,14 @@ object HashSparkApp extends App with Logging {
       log.info("Checking DB schema")
       CassandraConnector(spark.sparkContext).withSessionDo { cassandra =>
         gemini.applySchema(cassandra)
+
+        if (config.replace) {
+          gemini.cleanDB(cassandra, config.mode)
+        } else if (!gemini.isDBEmpty(cassandra, config.mode)) {
+          println("Database keyspace is not empty! Hashing may produce wrong results. " +
+            "Please choose another keyspace or pass the --replace option")
+          System.exit(2)
+        }
       }
 
       gemini.hash(reposPath, config.limit, config.format, config.mode, config.docFreqFile)
diff --git a/src/main/scala/tech/sourced/gemini/cmd/ReportApp.scala b/src/main/scala/tech/sourced/gemini/cmd/ReportApp.scala
index 2d266dd4..0690c9e8 100644
--- a/src/main/scala/tech/sourced/gemini/cmd/ReportApp.scala
+++ b/src/main/scala/tech/sourced/gemini/cmd/ReportApp.scala
@@ -58,8 +58,8 @@ object ReportApp extends App {
         })
       .action((x, c) => c.copy(output = x))
       .text("output format")
-    opt[Boolean]("cassandra")
-      .action((x, c) => c.copy(cassandra = x))
+    opt[Unit]("cassandra")
+      .action((x, c) => c.copy(cassandra = true))
       .text("Enable advanced cql queries for Apache Cassandra database")
   }