diff --git a/Package.resolved b/Package.resolved
index bcefeeef49d54251591a011e0fd0daeece9acc71..6f78ef2766d1092e033b37f88fd34df1b29c8d42 100644
--- a/Package.resolved
+++ b/Package.resolved
@@ -23,8 +23,8 @@
       "kind" : "remoteSourceControl",
       "location" : "https://github.com/L1MeN9Yu/Elva",
       "state" : {
-        "revision" : "9cd193dd5f0df430726256ab5d6aa08455f5d39b",
-        "version" : "2.1.3"
+        "revision" : "c95092d9b5c7f39ea0aaf391a8df3cced4168b73",
+        "version" : "2.2.0"
       }
     },
     {
diff --git a/Sources/SwiftNLP/1. Data Collection/PushShift Archives/MMapFileReadStream.swift b/Sources/SwiftNLP/1. Data Collection/PushShift Archives/MMapFileReadStream.swift
new file mode 100644
index 0000000000000000000000000000000000000000..0639a065f1a8eecfbea2f1f2bb3e33327ac3b1c9
--- /dev/null
+++ b/Sources/SwiftNLP/1. Data Collection/PushShift Archives/MMapFileReadStream.swift	
@@ -0,0 +1,53 @@
+// Copyright (c) 2024 Jim Wallace
+//
+// Permission is hereby granted, free of charge, to any person
+// obtaining a copy of this software and associated documentation
+// files (the "Software"), to deal in the Software without
+// restriction, including without limitation the rights to use,
+// copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following
+// conditions:
+//
+// The above copyright notice and this permission notice shall be
+// included in all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+// OTHER DEALINGS IN THE SOFTWARE.
+
+import Foundation
+import ZSTD
+
+/**
+            A memory mapped version of the FileReadStream -- not clear to me whether this will be useful, just for experimenting
+ */
+public class MMapFileReadStream {
+
+    private let data: Data
+    private var currentIndex = 0
+    
+    public init(path: String) throws {
+        let url = URL(fileURLWithPath: path)
+        self.data = try Data(contentsOf: url, options: .alwaysMapped)
+    }
+}
+
+extension MMapFileReadStream: ReadableStream {
+    public func read(_ buffer: UnsafeMutablePointer<UInt8>, length: Int) -> Int {
+        let bytesToRead = min(length, data.count - currentIndex)
+        data.copyBytes(to: buffer, from: currentIndex..<currentIndex + bytesToRead)
+        currentIndex += bytesToRead
+        return bytesToRead
+    }
+}
+
+extension MMapFileReadStream: ByteStream {
+    public func close() {
+    }
+}
diff --git a/Sources/SwiftNLP/1. Data Collection/PushShift Archives/MMapFileWriteStream.swift b/Sources/SwiftNLP/1. Data Collection/PushShift Archives/MMapFileWriteStream.swift
new file mode 100644
index 0000000000000000000000000000000000000000..99e829d4145e37ed7540c82dbeeeb708bff6ad3a
--- /dev/null
+++ b/Sources/SwiftNLP/1. Data Collection/PushShift Archives/MMapFileWriteStream.swift	
@@ -0,0 +1,46 @@
+// Copyright (c) 2024 Jim Wallace
+//
+// Permission is hereby granted, free of charge, to any person
+// obtaining a copy of this software and associated documentation
+// files (the "Software"), to deal in the Software without
+// restriction, including without limitation the rights to use,
+// copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following
+// conditions:
+//
+// The above copyright notice and this permission notice shall be
+// included in all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+// OTHER DEALINGS IN THE SOFTWARE.
+
+import Foundation
+import ZSTD
+
+public class MMapFileWriteStream {
+    private var data: Data
+    
+    public init(path: String) throws {
+        let url = URL(fileURLWithPath: path)
+        self.data = try Data(contentsOf: url, options: .alwaysMapped)
+    }
+}
+
+public extension MMapFileWriteStream {
+    func close() {
+    }
+}
+
+extension MMapFileWriteStream: WriteableStream {
+    public func write(_ data: UnsafePointer<UInt8>, length: Int) -> Int {
+        self.data.append(data, count: length)
+        return length
+    }
+}
diff --git a/Sources/SwiftNLP/1. Data Collection/PushShift Archives/MMapStreamingRedditArchiveDecoder.swift b/Sources/SwiftNLP/1. Data Collection/PushShift Archives/MMapStreamingRedditArchiveDecoder.swift
new file mode 100644
index 0000000000000000000000000000000000000000..d2b3d3e13855f342d9c2ead467b38066a08a3c85
--- /dev/null
+++ b/Sources/SwiftNLP/1. Data Collection/PushShift Archives/MMapStreamingRedditArchiveDecoder.swift	
@@ -0,0 +1,155 @@
+// Copyright (c) 2024 Jim Wallace
+//
+// Permission is hereby granted, free of charge, to any person
+// obtaining a copy of this software and associated documentation
+// files (the "Software"), to deal in the Software without
+// restriction, including without limitation the rights to use,
+// copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following
+// conditions:
+//
+// The above copyright notice and this permission notice shall be
+// included in all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+// OTHER DEALINGS IN THE SOFTWARE.
+
+import Foundation
+import ZSTD
+
+
+/**
+         Implements a BufferedMemoryStream that can be used to pull out JSON objects on the fly as part of an Elva streaming decompression chain
+ */
+public class MMapStreamingRedditArchiveDecoder<T: Decodable> {
+    
+    // Same functionality as Elva's BufferedMemoryStream
+    public private(set) var representation: Data = Data()
+    private var readerIndex: Int = 0
+
+    // Our variables:
+    private var filter: (T) -> Bool                             /// Determine whether we should keep each element, defaults to keep everything
+    public var memoryStore: [T]?  = nil                         /// Where to put output if we want to keep it in memory
+    private var outputStream: WriteableStream?  = nil           /// Where to write output to if we just want to keep streaming it, e.g., possibly a `FileWriteStream`
+    public var errorStore: [Data] = [Data]()                    /// Save any errors so that we can fix decoding ... remove this later?
+    
+    // We'll use this to pull out objects
+    private let decoder = JSONDecoder()
+    
+    
+    /**
+                    Initalizer that immediately decompresses the stream
+     */
+    public init(from: ReadableStream,
+                to: WriteableStream,
+                filter: @escaping (T) -> Bool = { _ in true },
+                config: ZSTD.DecompressConfig = ZSTD.DecompressConfig.default)
+    throws {
+        outputStream = to
+        self.filter = filter
+        try ZSTD.decompress(reader: from, writer: self, config: config)
+    }
+    
+    /**
+                    Initalizer that immediately decompresses the stream and stores its items in an array
+     */
+    public init(from: ReadableStream,
+                filter: @escaping (T) -> Bool = { _ in true },
+                config: ZSTD.DecompressConfig = ZSTD.DecompressConfig.default
+    ) throws {
+        memoryStore = [T]()
+        self.filter = filter
+        try ZSTD.decompress(reader: from, writer: self, config: config)
+    }
+    
+    /**
+                    Initalizer that immediately decompresses the file and stores its items in an array
+     */
+    public init(path: String,
+                filter: @escaping (T) -> Bool = { _ in true },
+                config: ZSTD.DecompressConfig = ZSTD.DecompressConfig.default
+    ) throws {
+                
+        let i = try? MMapFileReadStream(path: path)
+        
+        guard let i = i else {
+            throw NSError(domain: "com.example.generic", code: 0, userInfo: [NSLocalizedDescriptionKey: "A generic error occurred."]) // TODO: Clean this up
+        }
+        
+        memoryStore = [T]()
+        self.filter = filter
+        try ZSTD.decompress(reader: i, writer: self, config: config)
+    }
+    
+    /**
+                    Initalizer that immediately decompresses the file and stores its items in an array
+     */
+    public init(inputPath: String,
+                outputPath: String,
+                filter: @escaping (T) -> Bool = { _ in true }
+    ) throws {
+                
+        let i = try? MMapFileReadStream(path: inputPath)
+        let o = try? MMapFileWriteStream(path: outputPath)
+        
+        guard let i = i, let o = o else {
+            throw NSError(domain: "com.example.generic", code: 0, userInfo: [NSLocalizedDescriptionKey: "A generic error occurred."]) // TODO: Clean this up
+        }
+                        
+        self.outputStream = o
+        self.filter = filter
+        
+        let config = ZSTD.DecompressConfig(parameters: [.windowLogMax(31)])
+        
+        try ZSTD.decompress(reader: i, writer: self, config: config)
+    }
+    
+}
+ 
+
+extension MMapStreamingRedditArchiveDecoder: WriteableStream {
+    public func write(_ data: UnsafePointer<UInt8>, length: Int) -> Int {
+        representation += Data(bytes: data, count: length)
+        
+        // While we have newlines, work through the file
+        while let range = representation.range(of: Data([0x0A])) {
+            let chunk = representation.subdata(in: 0..<range.lowerBound + 1)
+            representation.removeSubrange(0..<range.lowerBound + 1)
+                        
+            let item = try? decoder.decode(T.self, from: chunk)
+            
+            // If we found an error, save the data
+            if item == nil {
+                errorStore.insert(chunk, at: errorStore.count)
+            }
+            
+            if let item = item, filter(item) {
+                if memoryStore != nil {
+                    memoryStore!.insert(item, at: memoryStore!.count)
+                }
+                if let output = outputStream {
+                    let size = chunk.count
+                    let buffer = UnsafeMutablePointer<UInt8>.allocate(capacity: size)
+                    chunk.copyBytes(to: buffer, count: size)
+                    let _ = output.write(buffer, length: size)
+                }
+            }
+        }
+                
+        return length
+    }
+    
+    public func close() {
+        if let out = outputStream {
+            out.close()
+        }
+    }
+    
+}
diff --git a/Sources/SwiftNLP/1. Data Collection/Pushshift Archives/RedditCommentData.swift b/Sources/SwiftNLP/1. Data Collection/PushShift Archives/RedditCommentData.swift
similarity index 100%
rename from Sources/SwiftNLP/1. Data Collection/Pushshift Archives/RedditCommentData.swift
rename to Sources/SwiftNLP/1. Data Collection/PushShift Archives/RedditCommentData.swift
diff --git a/Sources/SwiftNLP/1. Data Collection/Pushshift Archives/RedditContainer.swift b/Sources/SwiftNLP/1. Data Collection/PushShift Archives/RedditContainer.swift
similarity index 100%
rename from Sources/SwiftNLP/1. Data Collection/Pushshift Archives/RedditContainer.swift
rename to Sources/SwiftNLP/1. Data Collection/PushShift Archives/RedditContainer.swift
diff --git a/Sources/SwiftNLP/1. Data Collection/Pushshift Archives/RedditSubmissionData.swift b/Sources/SwiftNLP/1. Data Collection/PushShift Archives/RedditSubmissionData.swift
similarity index 100%
rename from Sources/SwiftNLP/1. Data Collection/Pushshift Archives/RedditSubmissionData.swift
rename to Sources/SwiftNLP/1. Data Collection/PushShift Archives/RedditSubmissionData.swift
diff --git a/Sources/SwiftNLP/1. Data Collection/PushShift Archives/StreamingRedditArchiveDecoder.swift b/Sources/SwiftNLP/1. Data Collection/PushShift Archives/StreamingRedditArchiveDecoder.swift
new file mode 100644
index 0000000000000000000000000000000000000000..88f74fb547d7afb0484286a27af6181c429f4ad2
--- /dev/null
+++ b/Sources/SwiftNLP/1. Data Collection/PushShift Archives/StreamingRedditArchiveDecoder.swift	
@@ -0,0 +1,132 @@
+// Copyright (c) 2024 Jim Wallace
+//
+// Permission is hereby granted, free of charge, to any person
+// obtaining a copy of this software and associated documentation
+// files (the "Software"), to deal in the Software without
+// restriction, including without limitation the rights to use,
+// copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following
+// conditions:
+//
+// The above copyright notice and this permission notice shall be
+// included in all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+// OTHER DEALINGS IN THE SOFTWARE.
+
+import Foundation
+import ZSTD
+
+
+/**
+         Implements a BufferedMemoryStream that can be used to pull out JSON objects on the fly as part of an Elva streaming decompression chain
+ */
+public class StreamingRedditArchiveDecoder<T: Decodable> {
+    
+    // Same functionality as Elva's BufferedMemoryStream
+    public private(set) var representation: Data = Data()
+    private var readerIndex: Int = 0
+
+    // Our variables:
+    private var filter: (T) -> Bool                             /// Determine whether we should keep each element, defaults to keep everything
+    public var memoryStore: [T]?  = nil                         /// Where to put output if we want to keep it in memory
+    private var outputStream: WriteableStream?  = nil           /// Where to write output to if we just want to keep streaming it, e.g., possibly a `FileWriteStream`
+    public var errorStore: [Data] = [Data]()                    /// Save any errors so that we can fix decoding ... remove this later?
+    
+    // We'll use this to pull out objects
+    private let decoder = JSONDecoder()
+    
+    
+    /**
+                    Initalizer that immediately decompresses the stream
+     */
+    public init(from: ReadableStream,
+                to: WriteableStream,
+                filter: @escaping (T) -> Bool = { _ in true },
+                config: ZSTD.DecompressConfig = ZSTD.DecompressConfig.default)
+    throws {
+        outputStream = to
+        self.filter = filter
+        try ZSTD.decompress(reader: from, writer: self, config: config)
+    }
+    
+    /**
+                    Initalizer that immediately decompresses the stream and stores its items in an array
+     */
+    public init(from: ReadableStream, 
+                filter: @escaping (T) -> Bool = { _ in true },
+                config: ZSTD.DecompressConfig = ZSTD.DecompressConfig.default
+    ) throws {
+        memoryStore = [T]()
+        self.filter = filter
+        try ZSTD.decompress(reader: from, writer: self, config: config)
+    }
+    
+    /**
+                    Initalizer that immediately decompresses the file and stores its items in an array
+     */
+    public init(path: String,
+                filter: @escaping (T) -> Bool = { _ in true },
+                config: ZSTD.DecompressConfig = ZSTD.DecompressConfig.default
+    ) throws {
+                
+        let i = try? FileReadStream(path: path)
+        
+        guard let i = i else {
+            throw NSError(domain: "com.example.generic", code: 0, userInfo: [NSLocalizedDescriptionKey: "A generic error occurred."]) // TODO: Clean this up
+        }
+        
+        memoryStore = [T]()
+        self.filter = filter
+        try ZSTD.decompress(reader: i, writer: self, config: config)
+    }
+    
+}
+ 
+
+extension StreamingRedditArchiveDecoder: WriteableStream {
+    public func write(_ data: UnsafePointer<UInt8>, length: Int) -> Int {
+        representation += Data(bytes: data, count: length)
+        
+        // While we have newlines, work through the file
+        while let range = representation.range(of: Data([0x0A])) {
+            let chunk = representation.subdata(in: 0..<range.lowerBound + 1)
+            representation.removeSubrange(0..<range.lowerBound + 1)
+                        
+            let item = try? decoder.decode(T.self, from: chunk)
+            
+            // If we found an error, save the data
+            if item == nil {
+                errorStore.insert(chunk, at: errorStore.count)
+            }
+            
+            if let item = item, filter(item) {
+                if memoryStore != nil {
+                    memoryStore!.insert(item, at: memoryStore!.count)
+                }
+                if let output = outputStream {
+                    let size = chunk.count
+                    let buffer = UnsafeMutablePointer<UInt8>.allocate(capacity: size)
+                    chunk.copyBytes(to: buffer, count: size)
+                    let _ = output.write(buffer, length: size)
+                }
+            }
+        }
+                
+        return length
+    }
+    
+    public func close() {
+        if let out = outputStream {
+            out.close()
+        }
+    }
+    
+}
diff --git a/Sources/SwiftNLP/1. Data Collection/Pushshift Archives/loadFromRedditArchive.swift b/Sources/SwiftNLP/1. Data Collection/PushShift Archives/loadFromRedditArchive.swift
similarity index 100%
rename from Sources/SwiftNLP/1. Data Collection/Pushshift Archives/loadFromRedditArchive.swift
rename to Sources/SwiftNLP/1. Data Collection/PushShift Archives/loadFromRedditArchive.swift
diff --git a/Sources/SwiftNLP/1. Data Collection/Pushshift Archives/BufferedRedditArchiveDecoder.swift b/Sources/SwiftNLP/1. Data Collection/Pushshift Archives/BufferedRedditArchiveDecoder.swift
deleted file mode 100644
index 41428c1d19f6ecf1d5e1723305cc8bfb8c2f6607..0000000000000000000000000000000000000000
--- a/Sources/SwiftNLP/1. Data Collection/Pushshift Archives/BufferedRedditArchiveDecoder.swift	
+++ /dev/null
@@ -1,109 +0,0 @@
-// Copyright (c) 2024 Jim Wallace
-//
-// Permission is hereby granted, free of charge, to any person
-// obtaining a copy of this software and associated documentation
-// files (the "Software"), to deal in the Software without
-// restriction, including without limitation the rights to use,
-// copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the
-// Software is furnished to do so, subject to the following
-// conditions:
-//
-// The above copyright notice and this permission notice shall be
-// included in all copies or substantial portions of the Software.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-// OTHER DEALINGS IN THE SOFTWARE.
-
-import Foundation
-import ZSTD
-
-
-/**
-         Implements a BufferedMemoryStream that can be used to pull out JSON objects on the fly as part of an Elva streaming decompression chain
- */
-public class BufferedRedditArchiveDecoder<T: Decodable> {
-    
-    // Same functionality as Elva's BufferedMemoryStream
-    public private(set) var representation: Data = Data()
-    private var readerIndex: Int = 0
-
-    // Our variables:
-    private var filter: (T) -> Bool = { _ in true }                         /// Determine whether we should keep each element, defaults to keep everything
-    private var memoryStore: (any RangeReplaceableCollection<T>)?  = nil    /// Where to put output if we want to keep it in memory as `RedditDataItem` objects
-    private var outputStream: WriteableStream?  = nil                       /// Where to write output to if we just want to keep streaming it, e.g., possibly a `FileWriteStream`
-    private var remainingItems: UInt = 0                                    /// How many items are left from the index?
-    private var processedItems = 0
-    
-    // We'll use this to pull out objects
-    private let decoder = JSONDecoder()
-    
-    
-    // Constants
-    //private let leftBracket = Data([0x7B])
-    //private let rightBracket = Data([0x7D])
-        
-    public init(saveTo: WriteableStream, filter: @escaping (T) -> Bool = { _ in true }) {
-        outputStream = saveTo
-        self.filter = filter
-    }
-    
-    public init(storeIn: any RangeReplaceableCollection<T>, filter: @escaping (T) -> Bool = { _ in true }) {
-        memoryStore = storeIn
-        self.filter = filter
-    }
-}
- 
-
-extension BufferedRedditArchiveDecoder: WriteableStream {
-    public func write(_ data: UnsafePointer<UInt8>, length: Int) -> Int {
-        representation += Data(bytes: data, count: length)
-        
-        // While we have newlines, work through the file
-        while let range = representation.range(of: Data([0x0A])) {
-            let extractedChunk = representation.subdata(in: 0..<range.lowerBound + 1)
-            representation.removeSubrange(0..<range.lowerBound + 1)
-                                    
-            // TODO: The big archive files start with a [ String : Int32 ] that we need to parse conditionally ... need to test with those files
-            if processedItems == 0 {                    
-                // TODO: We probably have an extra { at the beginning and end for these files? How do we handle that gracefully?
-                let index = try? decoder.decode([String: Int32].self, from: extractedChunk)
-                
-                if let index = index {
-                    remainingItems = UInt(index.count)
-                }
-            }
-
-            let item = try? decoder.decode(T.self, from: extractedChunk)
-            
-            if let item = item, filter(item) {
-                if var memory = memoryStore {
-                    memory.append(item)
-                }
-                if let output = outputStream {
-                    let size = extractedChunk.count
-                    let buffer = UnsafeMutablePointer<UInt8>.allocate(capacity: size)
-                    extractedChunk.copyBytes(to: buffer, count: size)
-                    let _ = output.write(buffer, length: size)
-                }
-            }
-            processedItems += 1
-        }
-                
-        return length
-    }
-    
-    public func close() {
-        if let out = outputStream {
-            out.close()
-        }
-        debugPrint("Processed \(processedItems) entries.")
-    }
-    
-}
diff --git a/Sources/SwiftNLP/1. Data Collection/Reddit API/Data Types/Comment.swift b/Sources/SwiftNLP/1. Data Collection/Reddit API/Data Types/Comment.swift
index 3a9408f988559cb59248a7e63ff021f5f0ee489b..2a6df1d9389c3fc984ea5681280c4a91f985b9dc 100644
--- a/Sources/SwiftNLP/1. Data Collection/Reddit API/Data Types/Comment.swift	
+++ b/Sources/SwiftNLP/1. Data Collection/Reddit API/Data Types/Comment.swift	
@@ -45,4 +45,77 @@ public struct Comment: RedditDataItem {
     public let subreddit: String?
     public let subreddit_id: String?
     public let replies: String?
+    
+    enum CodingKeys: String, CodingKey {
+        case author = "author"
+        case author_created_utc = "author_created_utc"
+        case author_flair_css_class = "author_flair_css_class"
+        case author_flair_text = "author_flair_text"
+        case author_fullname = "author_fullname"
+        case body = "body"
+        case controversiality = "controversiality"
+        case created_utc = "created_utc"
+        case distinguished = "distinguished"
+        case gilded = "gilded"
+        case id = "id"
+        case link_id = "link_id"
+        case nest_level = "nest_level"
+        case parent_id = "parent_id"
+        case reply_delay = "reply_delay"
+        case retrieved_on = "retrieved_on"
+        case score = "score"
+        case score_hidden = "score_hidden"
+        case subreddit = "subreddit"
+        case subreddit_id = "subreddit_id"
+        case replies = "replies"
+    }
 }
+
+
+extension Comment {
+    /**
+           Can we handle some of the messy data?
+     */
+    public init(from decoder: Decoder) throws {
+        
+        let container = try decoder.container(keyedBy: CodingKeys.self)
+        
+        author = try container.decodeIfPresent(String.self, forKey: .author)
+        author_created_utc = try container.decodeIfPresent(Int32.self, forKey: .author_created_utc)
+        author_flair_css_class = try container.decodeIfPresent(String.self, forKey: .author_flair_css_class)
+        author_flair_text = try container.decodeIfPresent(String.self, forKey: .author_flair_text)
+        author_fullname = try container.decodeIfPresent(String.self, forKey: .author_fullname)
+        body = try container.decodeIfPresent(String.self, forKey: .body)
+        controversiality = try container.decodeIfPresent(Int32.self, forKey: .controversiality)
+        distinguished = try container.decodeIfPresent(String.self, forKey: .distinguished)
+        gilded = try container.decodeIfPresent(Int32.self, forKey: .gilded)
+        id = try container.decodeIfPresent(String.self, forKey: .id)
+        link_id = try container.decodeIfPresent(String.self, forKey: .link_id)
+        nest_level = try container.decodeIfPresent(Int32.self, forKey: .nest_level)
+        parent_id = try container.decodeIfPresent(String.self, forKey: .parent_id)
+        reply_delay = try container.decodeIfPresent(Int32.self, forKey: .reply_delay)
+        retrieved_on = try container.decodeIfPresent(Int32.self, forKey: .retrieved_on)
+        score = try container.decodeIfPresent(Int32.self, forKey: .score)
+        score_hidden = try container.decodeIfPresent(Bool.self, forKey: .score_hidden)
+        subreddit = try container.decodeIfPresent(String.self, forKey: .subreddit)
+        subreddit_id = try container.decodeIfPresent(String.self, forKey: .subreddit_id)
+        replies = try container.decodeIfPresent(String.self, forKey: .replies)
+        
+        
+        // Custom decoding for 'created_utc'
+        //created_utc = try container.decodeIfPresent(Int32.self, forKey: .created_utc)
+        if let createdValue = try? container.decode(Int32.self, forKey: .created_utc) {
+            created_utc = createdValue
+        } else if let createdString = try? container.decode(String.self, forKey: .created_utc),
+                  let createdValue = Int32(createdString) {
+            created_utc = createdValue
+        } else {
+            throw DecodingError.typeMismatch(
+                Int32.self,
+                DecodingError.Context(codingPath: [CodingKeys.id], debugDescription: "Expected to decode Int32 or String for created_utc")
+            )
+        }
+        
+    }
+}
+
diff --git a/Sources/SwiftNLP/1. Data Collection/Reddit API/Data Types/Listing + Codable.swift b/Sources/SwiftNLP/1. Data Collection/Reddit API/Data Types/Listing + Codable.swift
index 259163eb577ec427b9648df68bcf5b6b1cfb4044..67381ce47aff7174e851ab2a9fd35469e89c3848 100644
--- a/Sources/SwiftNLP/1. Data Collection/Reddit API/Data Types/Listing + Codable.swift	
+++ b/Sources/SwiftNLP/1. Data Collection/Reddit API/Data Types/Listing + Codable.swift	
@@ -31,7 +31,7 @@ extension ListingDataItem {
             
         case .more:
             //debugPrint("FOUND MORE")
-            data = try container.decode(RessidtListingMore.self, forKey: .data)
+            data = try container.decode(RedditListingMore.self, forKey: .data)
             
         default:
             throw SessionError(message: "Unknown type of Reddit content from JSON.")
diff --git a/Sources/SwiftNLP/1. Data Collection/Reddit API/Data Types/MoreComments.swift b/Sources/SwiftNLP/1. Data Collection/Reddit API/Data Types/MoreComments.swift
index b8eb04b382b9d37210e84f19113ebf96f9051706..1611c7c56abf1584a37d4f60605073fb66cdd8da 100644
--- a/Sources/SwiftNLP/1. Data Collection/Reddit API/Data Types/MoreComments.swift	
+++ b/Sources/SwiftNLP/1. Data Collection/Reddit API/Data Types/MoreComments.swift	
@@ -21,7 +21,7 @@
 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 // OTHER DEALINGS IN THE SOFTWARE.
 
-struct RessidtListingMore: RedditDataItem {
+struct RedditListingMore: RedditDataItem {
         
     //let name: String?
     //let kind: String?
@@ -32,6 +32,7 @@ struct RessidtListingMore: RedditDataItem {
     
     var id: String? { return nil }
     var created_utc: Int32? { return nil } //  TODO: This is a hack that allows conformance to RedditDataItem ... fix later?
+    var subreddit: String? { return nil } //  TODO: This is a hack that allows conformance to RedditDataItem ... fix later?
 }
 
 struct MoreContainer: Codable {
diff --git a/Sources/SwiftNLP/1. Data Collection/Reddit API/Data Types/RedditDataItem.swift b/Sources/SwiftNLP/1. Data Collection/Reddit API/Data Types/RedditDataItem.swift
index 182162d2c9da1d8a1b13d438a5d18a02f02c51ad..e7b2f01f201ae5ebb24f6f4c5a1d568c14506a90 100644
--- a/Sources/SwiftNLP/1. Data Collection/Reddit API/Data Types/RedditDataItem.swift	
+++ b/Sources/SwiftNLP/1. Data Collection/Reddit API/Data Types/RedditDataItem.swift	
@@ -24,4 +24,6 @@
 public protocol RedditDataItem: Codable, Equatable {
     var created_utc: Int32? { get }
     var id: String? { get }
+    var subreddit: String? { get }
+    // TODO: What are the common fields, and how many can we expose through this protocol?
 }
diff --git a/Sources/SwiftNLP/1. Data Collection/Reddit API/Data Types/Subreddit.swift b/Sources/SwiftNLP/1. Data Collection/Reddit API/Data Types/Subreddit.swift
index f67f08402a6ee16ffff21e2fa7ac0e9d8c3e5664..7004192134618779bb56cb546f0e6873f7f86bef 100644
--- a/Sources/SwiftNLP/1. Data Collection/Reddit API/Data Types/Subreddit.swift	
+++ b/Sources/SwiftNLP/1. Data Collection/Reddit API/Data Types/Subreddit.swift	
@@ -23,7 +23,7 @@
 
 // TODO: Add conformance to RedditDataItem
 public struct Subreddit: RedditDataItem {
-    
+        
     public var id: String?
     public let submitText: String?
     public let displayName: String?
@@ -44,6 +44,8 @@ public struct Subreddit: RedditDataItem {
     public let publicDescription: String?
     
     public var created_utc: Int32? { Int32(createdUTC) }
+    public var subreddit: String? { displayName }
+
     
     
     
diff --git a/Sources/SwiftNLP/1. Data Collection/Reddit API/Network Endpoints/Session + MoreChildren.swift b/Sources/SwiftNLP/1. Data Collection/Reddit API/Network Endpoints/Session + MoreChildren.swift
index b6f27c9063ffcb4b095a3fb978c248974065aab0..b52e88d19020e69b40427148769bac6785b4a5d9 100644
--- a/Sources/SwiftNLP/1. Data Collection/Reddit API/Network Endpoints/Session + MoreChildren.swift	
+++ b/Sources/SwiftNLP/1. Data Collection/Reddit API/Network Endpoints/Session + MoreChildren.swift	
@@ -77,7 +77,7 @@ extension Session {
                 comments.append(child.data as! Comment)
             }
             if child.kind == .more {
-                let moreItems = child.data as! RessidtListingMore
+                let moreItems = child.data as! RedditListingMore
                 more.append(contentsOf: moreItems.children)
             }
         }
diff --git a/Sources/SwiftNLP/1. Data Collection/Reddit API/Session.swift b/Sources/SwiftNLP/1. Data Collection/Reddit API/Session.swift
index 045e643a5b838160f20319b9e0484ecc9d8a9b58..a0244507e13032777856cc18e380f8bd84d75478 100644
--- a/Sources/SwiftNLP/1. Data Collection/Reddit API/Session.swift	
+++ b/Sources/SwiftNLP/1. Data Collection/Reddit API/Session.swift	
@@ -119,7 +119,7 @@ extension Session {
                 comments.append(child.data as! Comment)
             }
             if child.kind == .more {
-                let moreItems = child.data as! RessidtListingMore
+                let moreItems = child.data as! RedditListingMore
                 more.append(contentsOf: moreItems.children)
             }
         }
diff --git a/Tests/SwiftNLPTests/1. Data Collection/CWorld.AI/Load 20k Tests.swift b/Tests/SwiftNLPTests/1. Data Collection/CWorld.AI/Load 20k Tests.swift
index d33ccfb74d5fc510ec46483f9113cd0a92925461..4a37465f348cf5fa4f021be7552fdde88bc0bf9c 100644
--- a/Tests/SwiftNLPTests/1. Data Collection/CWorld.AI/Load 20k Tests.swift	
+++ b/Tests/SwiftNLPTests/1. Data Collection/CWorld.AI/Load 20k Tests.swift	
@@ -1,29 +1,46 @@
+// Copyright (c) 2024 Jim Wallace
 //
-//  File.swift
-//  
+// Permission is hereby granted, free of charge, to any person
+// obtaining a copy of this software and associated documentation
+// files (the "Software"), to deal in the Software without
+// restriction, including without limitation the rights to use,
+// copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following
+// conditions:
 //
-//  Created by Jason Zhao on 2023-05-18.
+// The above copyright notice and this permission notice shall be
+// included in all copies or substantial portions of the Software.
 //
-import XCTest
-@testable import SwiftNLP
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+// OTHER DEALINGS IN THE SOFTWARE.
 
-final class SwiftNLPLoadDataTests: XCTestCase {
-            
-    func test20kDownload() async throws {
-        
-        let result = try await downloadSubredditFromServer(subreddit: "StopGaming")
-        //print("Loaded \(result.count) threads from server.")
-        //if let random = result.randomElement() {
-        //    let (key, value) = random
-            //print("Key: \(key), Value: \(value)")
-       // }
-        XCTAssertEqual(result.count, 38624, "Failed to load subreddit data from https://reddit-top20k.cworld.ai")
-        
-        var total = 0
-        for thread in result {
-            total += thread.value.comments.count
-            //print("Comments: \(thread.value.comments.count)")
-        }
-        XCTAssertEqual(total, 255294, "Failed to load comment data from https://reddit-top20k.cworld.ai")
-    }
-}
+//import XCTest
+//@testable import SwiftNLP
+//
+//final class SwiftNLPLoadDataTests: XCTestCase {
+//            
+//    func test20kDownload() async throws {
+//        
+//        let result = try await downloadSubredditFromServer(subreddit: "StopGaming")
+//        //print("Loaded \(result.count) threads from server.")
+//        //if let random = result.randomElement() {
+//        //    let (key, value) = random
+//            //print("Key: \(key), Value: \(value)")
+//       // }
+//        XCTAssertEqual(result.count, 38624, "Failed to load subreddit data from https://reddit-top20k.cworld.ai")
+//        
+//        var total = 0
+//        for thread in result {
+//            total += thread.value.comments.count
+//            //print("Comments: \(thread.value.comments.count)")
+//        }
+//        XCTAssertEqual(total, 255294, "Failed to load comment data from https://reddit-top20k.cworld.ai")
+//    }
+//}
diff --git a/Tests/SwiftNLPTests/1. Data Collection/PushShift Archives/PushShift Tests.swift b/Tests/SwiftNLPTests/1. Data Collection/PushShift Archives/PushShift Tests.swift
new file mode 100644
index 0000000000000000000000000000000000000000..096734314b9a1d2458b0e48f2ca9aa2fd39f7861
--- /dev/null
+++ b/Tests/SwiftNLPTests/1. Data Collection/PushShift Archives/PushShift Tests.swift	
@@ -0,0 +1,129 @@
+// Copyright (c) 2024 Jim Wallace
+//
+// Permission is hereby granted, free of charge, to any person
+// obtaining a copy of this software and associated documentation
+// files (the "Software"), to deal in the Software without
+// restriction, including without limitation the rights to use,
+// copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following
+// conditions:
+//
+// The above copyright notice and this permission notice shall be
+// included in all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+// OTHER DEALINGS IN THE SOFTWARE.
+
+import XCTest
+@testable import SwiftNLP
+import ZSTD
+
+final class PushShiftStreamingTests: XCTestCase {
+            
+    
+    func testNoIndex() async throws {
+        
+        guard let submissionsURL = Bundle.module.url(forResource: "Guelph_submissions", withExtension: "zst") else {
+            fatalError("Failed to find waterloo_submissions.zst in test bundle.")
+        }
+        
+        let input = try FileReadStream(path: submissionsURL.path)
+        let stream = try StreamingRedditArchiveDecoder<Submission>(from: input)
+        
+        XCTAssert(stream.memoryStore!.count == 17999)
+        XCTAssert(stream.errorStore.count == 0)
+    }
+    
+    func testNoIndexBig() async throws {
+        
+        guard let submissionsURL = Bundle.module.url(forResource: "Guelph_submissions", withExtension: "zst") else {
+            fatalError("Failed to find waterloo_submissions.zst in test bundle.")
+        }
+        guard let commentsURL = Bundle.module.url(forResource: "Guelph_comments", withExtension: "zst") else {
+            fatalError("Failed to find waterloo_submissions.zst in test bundle.")
+        }
+        
+        let sInput = try FileReadStream(path: submissionsURL.path)
+        let sStream = try StreamingRedditArchiveDecoder<Submission>(from: sInput)
+
+        let cInput = try FileReadStream(path: commentsURL.path)
+        let cStream = try StreamingRedditArchiveDecoder<Comment>(from: cInput)
+                
+        XCTAssert(sStream.memoryStore!.count == 17999)
+        XCTAssert(sStream.errorStore.count == 0)
+        XCTAssert(cStream.memoryStore!.count == 160104)
+        XCTAssert(cStream.errorStore.count == 0)
+    }
+    
+    func testIndex() async throws {
+
+        guard let submissionsURL = Bundle.module.url(forResource: "RS_2006-01", withExtension: "zst") else {
+            fatalError("Failed to find waterloo_submissions.zst in test bundle.")
+        }
+        
+        
+        let input = try FileReadStream(path: submissionsURL.path)
+        let config = ZSTD.DecompressConfig(parameters: [.windowLogMax(31)])                         // These files require a bigger window size
+        let stream = try StreamingRedditArchiveDecoder<Submission>(from: input, config: config)
+
+        XCTAssert(stream.memoryStore!.count == 8048)
+        XCTAssert(stream.errorStore.count == 0)
+
+    }
+    
+    func testIndexBig() async throws {
+        
+        guard let submissionsURL = Bundle.module.url(forResource: "RS_2006-01", withExtension: "zst") else {
+            fatalError("Failed to find waterloo_submissions.zst in test bundle.")
+        }
+        guard let commentsURL = Bundle.module.url(forResource: "RC_2006-01", withExtension: "zst") else {
+            fatalError("Failed to find waterloo_submissions.zst in test bundle.")
+        }
+        
+        
+        let sInput = try FileReadStream(path: submissionsURL.path)
+        let config = ZSTD.DecompressConfig(parameters: [.windowLogMax(31)])                         // These files require a bigger window size
+        let sStream = try StreamingRedditArchiveDecoder<Submission>(from: sInput, config: config)
+
+        
+        let cInput = try FileReadStream(path: commentsURL.path)
+        let cStream = try StreamingRedditArchiveDecoder<Comment>(from: cInput, config: config)
+                        
+        XCTAssert(sStream.memoryStore!.count == 8048)
+        XCTAssert(sStream.errorStore.count == 0)
+        XCTAssert(cStream.memoryStore!.count == 3666)
+        XCTAssert(cStream.errorStore.count == 0)
+    }
+    
+    func testFilter() async throws {
+        
+        guard let submissionsURL = Bundle.module.url(forResource: "Guelph_submissions", withExtension: "zst") else {
+            fatalError("Failed to find waterloo_submissions.zst in test bundle.")
+        }
+        
+        
+        let input = try FileReadStream(path: submissionsURL.path)
+        
+        let filter: (Submission) -> Bool = { value in
+            if let selftext = value.selftext {
+                if selftext.contains("university") {
+                    return true
+                }
+            }
+            return false
+        }
+
+        let stream = try StreamingRedditArchiveDecoder(from: input, filter: filter)
+        
+        XCTAssert(stream.memoryStore!.count == 139)
+        XCTAssert(stream.errorStore.count == 0)
+    }
+
+}
diff --git a/Tests/SwiftNLPTests/Resources/RC_2006-01.zst b/Tests/SwiftNLPTests/Resources/RC_2006-01.zst
new file mode 100644
index 0000000000000000000000000000000000000000..fd0a54464b688cb7b09454284695ee749bc9ac28
Binary files /dev/null and b/Tests/SwiftNLPTests/Resources/RC_2006-01.zst differ
diff --git a/Tests/SwiftNLPTests/Resources/RS_2006-01.zst b/Tests/SwiftNLPTests/Resources/RS_2006-01.zst
new file mode 100644
index 0000000000000000000000000000000000000000..c87fdea601108b10b3a642e0055679de77572629
Binary files /dev/null and b/Tests/SwiftNLPTests/Resources/RS_2006-01.zst differ
diff --git a/Tests/SwiftNLPTests/TestUtil/LoadTestJson.swift b/Tests/SwiftNLPTests/TestUtil/LoadTestJson.swift
deleted file mode 100644
index 770d52de2b72400a0fde878c1ef3454781109fef..0000000000000000000000000000000000000000
--- a/Tests/SwiftNLPTests/TestUtil/LoadTestJson.swift
+++ /dev/null
@@ -1,95 +0,0 @@
-//
-//  LoadTestJson.swift
-//  
-//
-//  Created by Jason Zhao on 2023-05-18.
-//
-
-import Foundation
-import SwiftNLP
-
-// the goal of this util is to fetch and load json files into the Data class
-// which can then be used directly by the package to convert into objects
-// right now it takes test json from the Resources folder in Test
-// however, eventually we would like to fetch the data from a cloud server
-// With modularity, we can simply change the logic of this Util to grab from cloud, and return the same data structure
-class TestUtils {
-    // loads a json data given file name
-    static func loadJsonData(file: String) -> Data {
-        let bundle = Bundle.module
-        guard let url = bundle.url(forResource: file, withExtension: "json") else {
-            fatalError("Failed to find \(file).json in test bundle.")
-        }
-        guard let data = try? Data(contentsOf: url) else {
-            fatalError("Failed to load \(file).json from test bundle.")
-        }
-        return data
-    }
-    
-    // returns all json files with the specific prefix
-    // right now we distinguish with "RC" and "RS"
-    static func getJsonFiles(prefix: String) -> [String] {
-        let fileManager = FileManager.default
-        guard let resourcePath = Bundle.module.resourcePath else {
-            print("Failed to find resource path")
-            return []
-        }
-
-        do {
-            let files = try fileManager.contentsOfDirectory(atPath: resourcePath)
-            let jsonFiles = files.filter{$0.hasPrefix(prefix) && $0.hasSuffix(".json")}
-            
-            var fileNames: [String] = []
-
-            for jsonFile in jsonFiles {
-                let fileName = (jsonFile as NSString).deletingPathExtension
-                fileNames.append(fileName)
-            }
-            return fileNames
-        } catch let error as NSError {
-            print("Error while getting files : \(error)")
-            return []
-        }
-    }
-    
-    // load all the json files with the prefix
-    static func loadAllFiles(prefix: String) -> [Data] {
-        let allJsonFiles = TestUtils.getJsonFiles(prefix: prefix)
-        var result: [Data] = []
-        for fileName in allJsonFiles {
-            let json = TestUtils.loadJsonData(file: fileName)
-            result.append(json)
-        }
-        return result
-    }
-    
-    // loads all reddit comments files into json Data
-    static func loadAllRedditComment() -> [Data] {
-        return loadAllFiles(prefix: "RC")
-    }
-    
-    // loads all reddit submissions files into json Data
-    static func loadAllRedditSubmission() -> [Data] {
-        return loadAllFiles(prefix: "RS")
-    }
-    
-    static func readRedditCommentJson(json: Data) -> RedditCommentData? {
-        do {
-            let commentData = try JSONDecoder().decode(RedditCommentData.self, from: json)
-            return commentData
-        } catch {
-            print("Error while decoding reddit comment file: \(error)")
-            return nil
-        }
-    }
-    
-    static func readRedditSubmissionJson(json: Data) -> RedditSubmissionData? {
-        do {
-            let submissionData = try JSONDecoder().decode(RedditSubmissionData.self, from: json)
-            return submissionData
-        } catch {
-            print("Error while decoding reddit submission file: \(error)")
-            return nil
-        }
-    }
-}