diff --git a/Sources/SwiftNLP/1. Data Collection/DurableHNSWCorpus + File IO.swift b/Sources/SwiftNLP/1. Data Collection/DurableHNSWCorpus + File IO.swift
new file mode 100644
index 0000000000000000000000000000000000000000..d24a552efbdf3012ddcf46f3f969f6dca3268172
--- /dev/null
+++ b/Sources/SwiftNLP/1. Data Collection/DurableHNSWCorpus + File IO.swift	
@@ -0,0 +1,129 @@
+//
+//  DurableHNSWCorpus + File IO.swift
+//
+//
+//  Created by Mingchung Xia on 2024-03-12.
+//
+
+import Foundation
+
+// MARK: This extension for saving and loading the memory map data of untokenized documents is currently used as a workaround
+// This is because loading the memory mapped data using CoreLMDB does not load the untokenized documents (and the other fields) of a DurableHNSWCorpus so in order to write and read from disk of the original data, we need to have this workaround
+// Eventually, all this code in this extension should be moved to the HNSWCorpusDataHandler after a general wrapper class for DurableHNSW and EmphemeralHNSW is made
+
+extension DurableHNSWCorpus {
+    /// Saves untokenized documents in dictionary to disk to Downloads directory
+    func saveDictionaryToDownloads(fileName: String) {
+        guard let downloadsURL = FileManager.default.urls(for: .downloadsDirectory, in: .userDomainMask).first else {
+            print("Could not find Downloads directory")
+            return
+        }
+        
+        let fileURL = downloadsURL.appendingPathComponent(fileName)
+        
+        saveDictionaryMemoryMap(url: fileURL)
+    }
+    
+    func saveDictionaryMemoryMap(url: URL) {
+        let fileManager = FileManager.default
+        if !fileManager.fileExists(atPath: url.path) {
+            fileManager.createFile(atPath: url.path, contents: nil, attributes: nil)
+        }
+        
+        do {
+            let fileHandle = try FileHandle(forWritingTo: url)
+            
+            let count = dictionary.count
+            let countData = withUnsafeBytes(of: count) { Data($0) }
+            fileHandle.write(countData)
+            
+            for (key, value) in dictionary {
+                let keyData = withUnsafeBytes(of: key) { Data($0) }
+                fileHandle.write(keyData)
+                
+                // Convert the untokenizedDocument (String) to Data
+                let documentData = value.untokenizedDocument.data(using: .utf8) ?? Data()
+                // Prefix the document data with its length to know how much to read when loading
+                let documentLengthData = withUnsafeBytes(of: documentData.count) { Data($0) }
+                fileHandle.write(documentLengthData)
+                fileHandle.write(documentData)
+                
+                // Convert the vector ([Double]) to Data
+                let vectorData = value.vector.withUnsafeBytes { Data($0) }
+                // Prefix the vector data with its length to know how much to read when loading
+                let vectorLengthData = withUnsafeBytes(of: value.vector.count) { Data($0) }
+                fileHandle.write(vectorLengthData)
+                fileHandle.write(vectorData)
+            }
+            
+            fileHandle.closeFile()
+        } catch {
+            print("Error writing dictionary to file: \(error)")
+        }
+    }
+    
+    static func readDictionaryFromDownloads(fileName: String, width: Int = 50) -> HNSWDictionary {
+        guard let downloadsURL = FileManager.default.urls(for: .downloadsDirectory, in: .userDomainMask).first else {
+            print("Could not find Downloads directory")
+            return [:]
+        }
+        
+        let fileURL = downloadsURL.appendingPathComponent(fileName)
+        
+        return readDictionaryMemoryMap(fileURL, width: width)
+    }
+    
+    /// Width is the number of dimensions of the glove encoding
+    // TODO: Improve this to not need to take in a width, rather switch between the encoding / encoder
+    static func readDictionaryMemoryMap(_ url: URL, width: Int = 50) -> HNSWDictionary {
+        var dictionary = HNSWDictionary()
+
+        do {
+            let data = try Data(contentsOf: url)
+            var index = 0
+
+            // Safely read the dictionary count
+            let countData = data.subdata(in: index..<index+MemoryLayout<Int>.size)
+            let count = countData.withUnsafeBytes { $0.load(as: Int.self) }
+            index += MemoryLayout<Int>.size
+
+            for _ in 0..<count {
+                // Safely read the key
+                let keyData = data.subdata(in: index..<index+MemoryLayout<Int>.size)
+                let key = keyData.withUnsafeBytes { $0.load(as: Int.self) }
+                index += MemoryLayout<Int>.size
+
+                // Read the document length and document
+                let documentLengthData = data.subdata(in: index..<index+MemoryLayout<Int>.size)
+                let documentLength = documentLengthData.withUnsafeBytes { $0.load(as: Int.self) }
+                index += MemoryLayout<Int>.size
+                let documentData = data.subdata(in: index..<index+documentLength)
+                guard let document = String(data: documentData, encoding: .utf8) else {
+                    print("Failed to decode string")
+                    continue // Skip this entry on failure
+                }
+                index += documentLength
+
+                // Read the vector
+                let vectorLengthData = data.subdata(in: index..<index+MemoryLayout<Int>.size)
+                let vectorLength = vectorLengthData.withUnsafeBytes { $0.load(as: Int.self) }
+                index += MemoryLayout<Int>.size
+
+                var vector = [Double]()
+                for _ in 0..<vectorLength {
+                    let vectorElementData = data.subdata(in: index..<index+MemoryLayout<Double>.size)
+                    let vectorElement = vectorElementData.withUnsafeBytes { $0.load(as: Double.self) }
+                    vector.append(vectorElement)
+                    index += MemoryLayout<Double>.size
+                }
+
+                // Add the key-value pair to the dictionary
+                dictionary[key] = DocumentVectorPair(untokenizedDocument: document, vector: vector)
+            }
+        } catch {
+            print("Error reading dictionary from file: \(error)")
+        }
+
+        return dictionary
+    }
+}
diff --git a/Sources/SwiftNLP/1. Data Collection/HNSW/HNSWCorpusDataHandler.swift b/Sources/SwiftNLP/1. Data Collection/HNSW/HNSWCorpusDataHandler.swift
index 7911ebbf91445c07b333d67ba336615ea75f50f9..88e95ff0437d0f1b62ca074f94a804f22216adfd 100644
--- a/Sources/SwiftNLP/1. Data Collection/HNSW/HNSWCorpusDataHandler.swift	
+++ b/Sources/SwiftNLP/1. Data Collection/HNSW/HNSWCorpusDataHandler.swift	
@@ -84,6 +84,11 @@ extension HNSWCorpusDataHandler {
         }
     }
     
+    /// This saves only the untokenized documents dictionary map
+    func saveDictionaryMemoryMap() {
+        // TODO: Move from DurableHNSW extension once HNSW wrapper is created
+    }
+    
     // TODO: find out how to not rebuild the index
     static func loadMemoryMap(encoder: any SNLPEncoder, typicalNeighborhoodSize: Int = 20, resource: String = "hnsw") -> HNSWCorpus<Double> {
         guard let url = Bundle.module.url(forResource: resource, withExtension: "mmap") else {
@@ -127,4 +132,8 @@ extension HNSWCorpusDataHandler {
         let encoder = ContextFreeEncoder<Scalar>(source: encoding)
         return loadMemoryMap(encoder: encoder, typicalNeighborhoodSize: typicalNeighborhoodSize, resource: resource)
     }
+    
+    static func loadDictionaryMemoryMap() {
+        // TODO: Move from DurableHNSW extension once HNSW wrapper is created
+    }
 }
diff --git a/Tests/SwiftNLPTests/2. Encoding/HNSWTests.swift b/Tests/SwiftNLPTests/2. Encoding/HNSWTests.swift
index cf5afed4c75ba3f9ffa67fe7c93ab4554a2609b7..eb7919a977af196985431d2a83c7ac3dc1b24f04 100644
--- a/Tests/SwiftNLPTests/2. Encoding/HNSWTests.swift	
+++ b/Tests/SwiftNLPTests/2. Encoding/HNSWTests.swift	
@@ -282,15 +282,18 @@ final class HNSWTests: XCTestCase {
         
         let transaction = try Transaction.begin(.write, in: env)
         
-        var corpus = try DurableHNSWCorpus(
-            encoder: _documentEncoder,
-            namespace: "testbasicqueryexampledurable",
-            in: transaction
-        )
-        
-        for doc in docs {
-            try corpus.addUntokenizedDocument(doc, in: transaction)
-        }
+        /// Saving the memory map to disk
+//        var corpus = try DurableHNSWCorpus(
+//            encoder: _documentEncoder,
+//            namespace: "testbasicqueryexampledurable",
+//            in: transaction
+//        )
+//        
+//        for doc in docs {
+//            try corpus.addUntokenizedDocument(doc, in: transaction)
+//        }
+//        
+//        corpus.saveDictionaryToDownloads(fileName: "dictionary.mmap")
         
         try transaction.commit()
         
@@ -299,6 +302,7 @@ final class HNSWTests: XCTestCase {
             
             let queryVector: [Double] = _documentEncoder.encodeToken(query).map { Double($0) }
             
+            /// Reading the memory map (and dictionary) from disk
             let readTransaction = try Transaction.begin(.write, in: env)
             
             let readCorpus = try DurableHNSWCorpus(
@@ -307,7 +311,8 @@ final class HNSWTests: XCTestCase {
                 in: readTransaction
             )
             
-            readCorpus.dictionary = corpus.getDictionary() // FIXME: don't copy over dictionary
+//            readCorpus.dictionary = corpus.getDictionary() // FIXME: don't copy over dictionary
+            readCorpus.dictionary = DurableHNSWCorpus.readDictionaryFromDownloads(fileName: "dictionary.mmap")
             
             // do not add documents here!