diff --git a/Sources/SwiftNLP/1. Data Collection/HNSWCorpusDataHandler.swift b/Sources/SwiftNLP/1. Data Collection/HNSWCorpusDataHandler.swift
index be7d814a3ea90f63e900702fa043eca1070ac031..74497a4e6c725e7416713313bf65aa6a4b6209ac 100644
--- a/Sources/SwiftNLP/1. Data Collection/HNSWCorpusDataHandler.swift	
+++ b/Sources/SwiftNLP/1. Data Collection/HNSWCorpusDataHandler.swift	
@@ -9,14 +9,53 @@ import Foundation
 
 final class HNSWCorpusDataHandler<Scalar: BinaryFloatingPoint & Codable> {
     var corpus: HNSWCorpus<Scalar>
+    private var url: URL?
     
-    init(corpus: HNSWCorpus<Scalar>) {
+    init(corpus: HNSWCorpus<Scalar>, resource: String = "hnsw") {
         self.corpus = corpus
+        // TODO: Try to fix this to work in the Bundle (does not write but can read)
+//        self.url = Bundle.module.url(forResource: resource, withExtension: "mmap")
+        if let downloadsDirectory = FileManager.default.urls(for: .downloadsDirectory, in: .userDomainMask).first {
+            self.url = downloadsDirectory.appendingPathComponent(resource + ".mmap")
+        }
+    }
+    
+    /// It is very difficult to get the exact size of the corpus as every class also depends on other classes
+    /// The size of the memory map may not even be correct if it only stores the vectors, and the vectors are really the only "important" part
+    func getCorpusSize() -> Int {
+//        return heapSize(corpus)
+//        return class_getInstanceSize(type(of: corpus))
+//        return MemoryLayout.size(ofValue: corpus)
+        var size = 0
+        let data = corpus.encodedDocuments.base.vectors
+        for vector in data {
+            size += MemoryLayout.size(ofValue: vector)
+        }
+        return size
     }
+    
+    func getDictionarySize(includeKey: Bool = true) -> Int {
+        var size = 0
+        let data = corpus.getDictionary()
+        for (key, documentVectorPair) in data {
+            if includeKey { size += MemoryLayout.size(ofValue: key) }
+            size += MemoryLayout.size(ofValue: documentVectorPair.untokenizedDocument)
+            size += MemoryLayout.size(ofValue: documentVectorPair.vector)
+        }
+        return size
+    }
+    
+//    private func heapSize(_ obj: AnyObject) -> Int {
+//        return malloc_size(Unmanaged.passUnretained(obj).toOpaque())
+//    }
 }
 
 extension HNSWCorpusDataHandler {
-    func saveMemoryMap(url: URL) {
+    func saveMemoryMap() {
+        guard let url = url else {
+            print("URL to resource not found")
+            return
+        }
         let fileManager = FileManager.default
         if !fileManager.fileExists(atPath: url.path) {
             fileManager.createFile(atPath: url.path, contents: nil, attributes: nil)
@@ -41,8 +80,15 @@ extension HNSWCorpusDataHandler {
     }
     
     // TODO: Change the return from Double to Scalar
-    static func loadMemoryMap(url: URL, encoder: any SNLPEncoder) -> HNSWCorpus<Double> {
-        var loadedCorpus = HNSWCorpus(encoder: encoder)
+    // TODO: Change to encoder parameter (any SNLPEncoder)
+    static func loadMemoryMap(encoding: ContextFreeEncoder<Scalar>.PreComputedEmbeddings, resource: String = "hnsw") -> HNSWCorpus<Double> {
+        let _documentEncoder = ContextFreeEncoder(source: encoding)
+
+        guard let url = Bundle.module.url(forResource: resource, withExtension: "mmap") else {
+            print("URL to resource not found")
+            return HNSWCorpus(encoder: _documentEncoder)
+        }
+        var loadedCorpus = HNSWCorpus(encoder: _documentEncoder)
         
         do {
             let data = try Data(contentsOf: url, options: .alwaysMapped)
diff --git a/Sources/SwiftNLP/Resources/hnsw_testbasicexample.mmap b/Sources/SwiftNLP/Resources/hnsw_testbasicexample.mmap
new file mode 100644
index 0000000000000000000000000000000000000000..d0d92e57876b3db107de88afb6eb3decbb9755a5
Binary files /dev/null and b/Sources/SwiftNLP/Resources/hnsw_testbasicexample.mmap differ
diff --git a/Sources/SwiftNLP/Resources/hnsw_testbiggerexample.mmap b/Sources/SwiftNLP/Resources/hnsw_testbiggerexample.mmap
new file mode 100644
index 0000000000000000000000000000000000000000..4f212d4768d1504de7684d8e4fc27cae3d1e9db3
Binary files /dev/null and b/Sources/SwiftNLP/Resources/hnsw_testbiggerexample.mmap differ
diff --git a/Sources/SwiftNLP/Resources/hnsw_testsubreddit.mmap b/Sources/SwiftNLP/Resources/hnsw_testsubreddit.mmap
new file mode 100644
index 0000000000000000000000000000000000000000..48d76cb321a78f73ba95e222efcb553469df352f
Binary files /dev/null and b/Sources/SwiftNLP/Resources/hnsw_testsubreddit.mmap differ
diff --git a/Tests/SwiftNLPTests/2. Encoding/HNSWTests.swift b/Tests/SwiftNLPTests/2. Encoding/HNSWTests.swift
index 2b364da02ceafcec2e15edc031c573213a3e1cb4..be33da78119d3fed2d4543105264896c6f08f094 100644
--- a/Tests/SwiftNLPTests/2. Encoding/HNSWTests.swift	
+++ b/Tests/SwiftNLPTests/2. Encoding/HNSWTests.swift	
@@ -17,8 +17,13 @@ final class HNSWTests: XCTestCase {
         var corpus = HNSWCorpus(encoding: .glove6B50d)
         corpus.addUntokenizedDocuments(docs)
         
-        let size = MemoryLayout.size(ofValue: corpus)
-        print("Approximate memory footprint: \(size) bytes")
+        let dataHandler = HNSWCorpusDataHandler(corpus: corpus, resource: "hnsw_testbasicexample")
+        let corpusSize = dataHandler.getCorpusSize()
+        let dictionarySize = dataHandler.getDictionarySize(includeKey: false)
+        print("Corpus size: \(corpusSize) bytes")
+        print("Dictionary size: \(dictionarySize) bytes")
+        dataHandler.saveMemoryMap()
+        // let corpus = HNSWCorpusDataHandler<Double>.loadMemoryMap(encoding: .glove6B50d, resource: "hnsw_testbasicexample")
         
         XCTAssert(corpus.count == 3)
         
@@ -57,8 +62,13 @@ final class HNSWTests: XCTestCase {
         var corpus = HNSWCorpus(encoding: .glove6B50d)
         corpus.addUntokenizedDocuments(twentyQuotes)
         
-        let size = MemoryLayout.size(ofValue: corpus)
-        print("Approximate memory footprint: \(size) bytes")
+        let dataHandler = HNSWCorpusDataHandler(corpus: corpus, resource: "hnsw_testbiggerexample")
+        let corpusSize = dataHandler.getCorpusSize()
+        let dictionarySize = dataHandler.getDictionarySize(includeKey: false)
+        print("Corpus size: \(corpusSize) bytes")
+        print("Dictionary size: \(dictionarySize) bytes")
+        dataHandler.saveMemoryMap()
+//        let corpus = HNSWCorpusDataHandler<Double>.loadMemoryMap(encoding: .glove6B50d, resource: "hnsw_testbiggerexample")
         
         XCTAssertEqual(corpus.count, 20)
         
@@ -87,8 +97,12 @@ final class HNSWTests: XCTestCase {
             }
         }
         
-        let size = MemoryLayout.size(ofValue: corpus)
-        print("Approximate memory footprint: \(size) bytes")
+        let dataHandler = HNSWCorpusDataHandler(corpus: corpus, resource: "hnsw_testsubreddit")
+        let corpusSize = dataHandler.getCorpusSize()
+        let dictionarySize = dataHandler.getDictionarySize(includeKey: false)
+        print("Corpus size: \(corpusSize) bytes")
+        print("Dictionary size: \(dictionarySize) bytes")
+        dataHandler.saveMemoryMap()
         
         //print("Loaded \(corpus.count) documents.")
         XCTAssert(corpus.count == 17999)
@@ -157,8 +171,11 @@ final class HNSWTests: XCTestCase {
         var corpus = HNSWCorpus(encoder: _documentEncoder)
         corpus.addUntokenizedDocuments(docs)
         
-        let size = MemoryLayout.size(ofValue: corpus)
-        print("Approximate memory footprint: \(size) bytes")
+        let dataHandler = HNSWCorpusDataHandler(corpus: corpus)
+        let corpusSize = dataHandler.getCorpusSize()
+        let dictionarySize = dataHandler.getDictionarySize(includeKey: false)
+        print("Corpus size: \(corpusSize) bytes")
+        print("Dictionary size: \(dictionarySize) bytes")
         
         do {
             print("Attempting to query corpus.encodedDocuments.find()...")
@@ -204,11 +221,16 @@ final class HNSWTests: XCTestCase {
         let query = "I love Albert Einstein!"
         
         let _documentEncoder = ContextFreeEncoder<Double>(source: .glove6B50d)
-        var corpus = HNSWCorpus(encoder: _documentEncoder)
-        corpus.addUntokenizedDocuments(docs)
+//        var corpus = HNSWCorpus(encoder: _documentEncoder)
+//        corpus.addUntokenizedDocuments(docs)
+//        
+//        let dataHandler = HNSWCorpusDataHandler(corpus: corpus)
+//        let corpusSize = dataHandler.getCorpusSize()
+//        let dictionarySize = dataHandler.getDictionarySize(includeKey: false)
+//        print("Corpus size: \(corpusSize) bytes")
+//        print("Dictionary size: \(dictionarySize) bytes")
         
-        let size = MemoryLayout.size(ofValue: corpus)
-        print("Approximate memory footprint: \(size) bytes")
+        let corpus = HNSWCorpusDataHandler<Double>.loadMemoryMap(encoding: .glove6B50d, resource: "hnsw_testbiggerexample")
         
         do {
             print("Attempting to query corpus.encodedDocuments.find()...")
@@ -229,28 +251,34 @@ final class HNSWTests: XCTestCase {
     // TODO: Get HNSWCorpus from memory map
     func testSubredditQueryExample() async throws {
         
-        guard let submissionsURL = Bundle.module.url(forResource: "Guelph_submissions", withExtension: "zst") else {
-            fatalError("Failed to find waterloo_submissions.zst in test bundle.")
-        }
-        guard let submissionsData = try? Data(contentsOf: submissionsURL) else {
-            fatalError("Failed to load waterloo_submissions.zst from test bundle.")
-        }
-        
-        let (submissions, _ ): ([Submission],[Data]) = try await loadFromRedditArchive(submissionsData)
-        
+//        guard let submissionsURL = Bundle.module.url(forResource: "Guelph_submissions", withExtension: "zst") else {
+//            fatalError("Failed to find waterloo_submissions.zst in test bundle.")
+//        }
+//        guard let submissionsData = try? Data(contentsOf: submissionsURL) else {
+//            fatalError("Failed to load waterloo_submissions.zst from test bundle.")
+//        }
+//        
+//        let (submissions, _ ): ([Submission],[Data]) = try await loadFromRedditArchive(submissionsData)
+//        
         let _documentEncoder = ContextFreeEncoder<Double>(source: .glove6B50d)
-        var corpus = HNSWCorpus(encoder: _documentEncoder)
-        
-        for submission in submissions {
-            if let text = submission.selftext {
-                corpus.addUntokenizedDocument(text)
-            }
-        }
+//        var corpus = HNSWCorpus(encoder: _documentEncoder)
+//        
+//        for submission in submissions {
+//            if let text = submission.selftext {
+//                corpus.addUntokenizedDocument(text)
+//            }
+//        }
+        let corpus = HNSWCorpusDataHandler<Double>.loadMemoryMap(encoding: .glove6B50d, resource: "hnsw_testsubreddit")
         
         let query = "Mr. Goose is a very important figure at the University of Waterloo."
         
-        let size = MemoryLayout.size(ofValue: corpus)
-        print("Approximate memory footprint: \(size) bytes")
+        let dataHandler = HNSWCorpusDataHandler(corpus: corpus)
+        let corpusSize = dataHandler.getCorpusSize()
+        let dictionarySize = dataHandler.getDictionarySize(includeKey: false)
+        print("Corpus size: \(corpusSize) bytes")
+        print("Dictionary size: \(dictionarySize) bytes")
+        
+        // Load from memory map here
         
         do {
             print("Attempting to query corpus.encodedDocuments.find()...")