From a26bba28550ef850e55cf735e97d2cf717614dc6 Mon Sep 17 00:00:00 2001
From: Jim Wallace <james.wallace@uwaterloo.ca>
Date: Wed, 20 Dec 2023 12:45:15 -0500
Subject: [PATCH] Added basic function to fetch all comments from a submission,
 returning a thread.

---
 .../Reddit API/Data Types/MoreComments.swift  | 13 ++++
 .../Reddit API/Data Types/Submission.swift    |  2 +
 .../Session + Comment Search.swift            | 12 ++--
 .../Network Endpoints/Session + Info.swift    | 31 +++++++++
 .../Session + MoreChildren.swift              | 25 +++++++-
 .../Reddit API/Session.swift                  | 64 ++++++++++++++++---
 .../Reddit API/Session Method Tests.swift     | 53 +++++++++++++++
 ...t => Session Network Endpoint Tests.swift} | 53 +++++++++------
 8 files changed, 215 insertions(+), 38 deletions(-)
 create mode 100644 Sources/SwiftNLP/1. Data Collection/Reddit API/Network Endpoints/Session + Info.swift
 create mode 100644 Tests/SwiftNLPTests/Reddit API/Session Method Tests.swift
 rename Tests/SwiftNLPTests/Reddit API/{Session Tests.swift => Session Network Endpoint Tests.swift} (81%)

diff --git a/Sources/SwiftNLP/1. Data Collection/Reddit API/Data Types/MoreComments.swift b/Sources/SwiftNLP/1. Data Collection/Reddit API/Data Types/MoreComments.swift
index 7c6000c3..b8eb04b3 100644
--- a/Sources/SwiftNLP/1. Data Collection/Reddit API/Data Types/MoreComments.swift	
+++ b/Sources/SwiftNLP/1. Data Collection/Reddit API/Data Types/MoreComments.swift	
@@ -33,3 +33,16 @@ struct RessidtListingMore: RedditDataItem {
     var id: String? { return nil }
     var created_utc: Int32? { return nil } //  TODO: This is a hack that allows conformance to RedditDataItem ... fix later?
 }
+
+struct MoreContainer: Codable {
+    var json: InnerMoreContainer
+}
+
+struct InnerMoreContainer: Codable {
+    let errors: [String]
+    let data: InnerInnerMoreContainer
+}
+
+struct InnerInnerMoreContainer: Codable {
+    let things: [ListingDataItem]
+}
diff --git a/Sources/SwiftNLP/1. Data Collection/Reddit API/Data Types/Submission.swift b/Sources/SwiftNLP/1. Data Collection/Reddit API/Data Types/Submission.swift
index f6804fcd..614f712e 100644
--- a/Sources/SwiftNLP/1. Data Collection/Reddit API/Data Types/Submission.swift	
+++ b/Sources/SwiftNLP/1. Data Collection/Reddit API/Data Types/Submission.swift	
@@ -43,6 +43,8 @@ public struct Submission: RedditDataItem {
     public let thumbnail: String?
     public let title: String?
     public let url: String?
+    
+    public var linkID: String { "\(RedditContentType.link)_\(id!)"}
 }
 
 extension Submission {
diff --git a/Sources/SwiftNLP/1. Data Collection/Reddit API/Network Endpoints/Session + Comment Search.swift b/Sources/SwiftNLP/1. Data Collection/Reddit API/Network Endpoints/Session + Comment Search.swift
index 37bcd894..1cbd9c66 100644
--- a/Sources/SwiftNLP/1. Data Collection/Reddit API/Network Endpoints/Session + Comment Search.swift	
+++ b/Sources/SwiftNLP/1. Data Collection/Reddit API/Network Endpoints/Session + Comment Search.swift	
@@ -27,7 +27,9 @@ extension Session {
     
     /// Returns a comment tree corresponding to a search of the r/subreddit/comments/article endpoint
     func searchComment(
-        submission: Submission,
+        subreddit: String,
+        articleID: String,
+        //submission: Submission,
         comment: String? = nil,
         context: UInt = 0,
         depth: Int? = nil,
@@ -43,10 +45,10 @@ extension Session {
         truncate: UInt = 0        
     ) async throws -> [Listing] {
                
-        guard let subreddit = submission.subreddit, let articleID = submission.id
-        else {
-            throw SessionError(message: "Submission must include article data.")
-        }
+//        guard let subreddit = submission.subreddit, let articleID = submission.id
+//        else {
+//            throw SessionError(message: "Submission must include article data.")
+//        }
         
         var parameters: [String : String] = [String:String]()
         
diff --git a/Sources/SwiftNLP/1. Data Collection/Reddit API/Network Endpoints/Session + Info.swift b/Sources/SwiftNLP/1. Data Collection/Reddit API/Network Endpoints/Session + Info.swift
new file mode 100644
index 00000000..6315738a
--- /dev/null
+++ b/Sources/SwiftNLP/1. Data Collection/Reddit API/Network Endpoints/Session + Info.swift	
@@ -0,0 +1,31 @@
+// Copyright (c) 2023 Jim Wallace
+//
+// Permission is hereby granted, free of charge, to any person
+// obtaining a copy of this software and associated documentation
+// files (the "Software"), to deal in the Software without
+// restriction, including without limitation the rights to use,
+// copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following
+// conditions:
+//
+// The above copyright notice and this permission notice shall be
+// included in all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+// OTHER DEALINGS IN THE SOFTWARE.
+
+import Foundation
+
+extension Session {
+ 
+    // TODO: Add caller for the /api/info endpoint
+    // This should give information about an arbitrary data point, useful, but is an additional call in many cases and so should be avoided
+    
+}
diff --git a/Sources/SwiftNLP/1. Data Collection/Reddit API/Network Endpoints/Session + MoreChildren.swift b/Sources/SwiftNLP/1. Data Collection/Reddit API/Network Endpoints/Session + MoreChildren.swift
index dbbc8070..2245850d 100644
--- a/Sources/SwiftNLP/1. Data Collection/Reddit API/Network Endpoints/Session + MoreChildren.swift	
+++ b/Sources/SwiftNLP/1. Data Collection/Reddit API/Network Endpoints/Session + MoreChildren.swift	
@@ -29,11 +29,11 @@ extension Session {
     func moreChildren(
         linkID: String,
         children: [String],
-        id: String?,
+        id: String? = nil,
         depth: Int? = nil,
         limitChildren: Bool = false,
         sort: ListingSortOrder = .new
-    ) async throws -> [Listing] {
+    ) async throws -> ([Comment],[String]) {
                
         
         var parameters: [String : String] = [String:String]()
@@ -59,10 +59,29 @@ extension Session {
         )
         
         do {
-            return try JSONDecoder().decode([Listing].self, from: data)
+            let container = try JSONDecoder().decode(MoreContainer.self, from: data)
+            return _processContainer(container)
+            
         } catch {
             throw SessionError(message: "Unable to decode server response.")
         }
     }
     
+    func _processContainer(_ c: MoreContainer) -> ([Comment], [String]) {
+        var comments: [Comment] = [Comment]()
+        var more: [String] = [String]()
+        
+        for child in c.json.data.things {
+            if child.kind == .comment {
+                comments.append(child.data as! Comment)
+            }
+            if child.kind == .more {
+                let moreItems = child.data as! RessidtListingMore
+                more.append(contentsOf: moreItems.children)
+            }
+        }
+        return (comments,more)
+    }
+    
+
 }
diff --git a/Sources/SwiftNLP/1. Data Collection/Reddit API/Session.swift b/Sources/SwiftNLP/1. Data Collection/Reddit API/Session.swift
index 0dee111a..9621f9b1 100644
--- a/Sources/SwiftNLP/1. Data Collection/Reddit API/Session.swift	
+++ b/Sources/SwiftNLP/1. Data Collection/Reddit API/Session.swift	
@@ -63,13 +63,57 @@ class Session {
 
 }
 
-//extension Session {
-//    
-//    func fetchThread(submissionID: String) -> RedditThread? {
-//        
-//        
-//        
-//        return nil
-//    }
-//    
-//}
+extension Session {
+    
+    func fetchThread(subreddit: String, articleID: String) async throws -> RedditThread {
+        
+        // TODO: What are the optimal parameters for this call?
+        let listings = try await searchComment(subreddit: subreddit, articleID: articleID, showMore: true)
+        
+        // First listing should contain our submission data
+        guard listings[0].data.children[0].kind == .link else {
+            throw SessionError(message: "Error, did not find initial post for this subreddit and articleID")
+        }
+        let submission = listings[0].data.children[0].data as! Submission
+        
+        // Iteratively unwrap the MoreComments results, until we're all done
+        var (comments, more) = _processListingIntoCommentsAndMore(listings[1])
+        
+        // While we have more comments to fetch, keep making calls to the MoreChildren endpoint
+        while !more.isEmpty {
+            
+            var toFetch: [String] = [String]()
+            
+            if more.count > 100 {
+                toFetch = Array(more[0 ..< 100])
+                more = Array(more[100 ..< more.count ])
+            } else {
+                toFetch = more
+                more = [String]()
+            }
+            
+            let (c2, m2) = try await moreChildren(linkID: submission.linkID, children: toFetch)
+            
+            // Add comments to our list of comments, add more items to our list of more items
+            comments.append(contentsOf: c2)
+            more.append(contentsOf: m2)
+        }
+        return RedditThread(submission: submission, comments: comments)
+    }
+    
+    func _processListingIntoCommentsAndMore(_ l: Listing) -> ([Comment], [String]) {
+        var comments: [Comment] = [Comment]()
+        var more: [String] = [String]()
+        for child in l.data.children {
+            if child.kind == .comment {
+                comments.append(child.data as! Comment)
+            }
+            if child.kind == .more {
+                let moreItems = child.data as! RessidtListingMore
+                more.append(contentsOf: moreItems.children)
+            }
+        }
+        return (comments,more)
+    }
+    
+}
diff --git a/Tests/SwiftNLPTests/Reddit API/Session Method Tests.swift b/Tests/SwiftNLPTests/Reddit API/Session Method Tests.swift
new file mode 100644
index 00000000..199045e1
--- /dev/null
+++ b/Tests/SwiftNLPTests/Reddit API/Session Method Tests.swift	
@@ -0,0 +1,53 @@
+// Copyright (c) 2023 Jim Wallace
+//
+// Permission is hereby granted, free of charge, to any person
+// obtaining a copy of this software and associated documentation
+// files (the "Software"), to deal in the Software without
+// restriction, including without limitation the rights to use,
+// copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following
+// conditions:
+//
+// The above copyright notice and this permission notice shall be
+// included in all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+// OTHER DEALINGS IN THE SOFTWARE.
+
+import Foundation
+import XCTest
+@testable import SwiftNLP
+
+final class RedditSessionMethodTest: XCTestCase {
+    
+    
+    func testFetchComments() async throws {
+        let id = ProcessInfo.processInfo.environment["REDDIT_CLIENT_ID"] ?? nil
+        let secret = ProcessInfo.processInfo.environment["REDDIT_CLIENT_SECRET"] ?? nil
+        
+        guard let id = id, let secret = secret else {
+            fatalError("Unable to fetch REDDIT_CLIENT_ID and REDDIT_CLIENT_SECRET from ProcessInfo.")
+        }
+        
+        let client = Session(id: id, secret: secret)
+        guard let _ = try? await client.authenticate() else {
+            throw SessionError(message: "Error authenticating client.")
+        }
+        
+        // "7dljcy", subreddit: "AskReddit")
+        // https://www.reddit.com/r/redditdev/comments/7dohn2/why_does_the_following_api_endpoint_return/
+        let result: RedditThread = try await client.fetchThread(subreddit: "AskReddit", articleID: "7dljcy") // TODO: We aren't loading all 2610 comments... some deleted, but can we tune the method to get more? 
+        //print("Loaded thread with \(result.comments.count) comments")
+        //print(result.submission)
+        //print(result.comments[100])
+        
+        XCTAssert(result.submission.num_comments == 2610 && result.submission.subreddit == "AskReddit")
+    }
+}
diff --git a/Tests/SwiftNLPTests/Reddit API/Session Tests.swift b/Tests/SwiftNLPTests/Reddit API/Session Network Endpoint Tests.swift
similarity index 81%
rename from Tests/SwiftNLPTests/Reddit API/Session Tests.swift
rename to Tests/SwiftNLPTests/Reddit API/Session Network Endpoint Tests.swift
index 2c30427b..38ddf0ea 100644
--- a/Tests/SwiftNLPTests/Reddit API/Session Tests.swift	
+++ b/Tests/SwiftNLPTests/Reddit API/Session Network Endpoint Tests.swift	
@@ -1,18 +1,31 @@
+// Copyright (c) 2023 Jim Wallace
 //
-//  File.swift
+// Permission is hereby granted, free of charge, to any person
+// obtaining a copy of this software and associated documentation
+// files (the "Software"), to deal in the Software without
+// restriction, including without limitation the rights to use,
+// copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following
+// conditions:
 //
+// The above copyright notice and this permission notice shall be
+// included in all copies or substantial portions of the Software.
 //
-//  Created by Nicole Mathis on 2023-07-04.
-//
-
-
-// When increasing the number of trees, the output is significantly more accurate however, the time to run becomes much longer
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+// OTHER DEALINGS IN THE SOFTWARE.
 
 import Foundation
 import XCTest
 @testable import SwiftNLP
 
-final class RedditSessionTest: XCTestCase {
+final class RedditSessionEndpointTest: XCTestCase {
     
     
     func testHasRedditCredentials() throws {
@@ -125,10 +138,10 @@ final class RedditSessionTest: XCTestCase {
         }
         
         // https://www.reddit.com/r/uwaterloo/comments/18lbokl/conestoga_college_finally_being_called_out_by_the/
-        let submission = Submission(id: "18lbokl", subreddit: "uwaterloo")
+        //let submission = Submission(id: "18lbokl", subreddit: "uwaterloo")
         
         // This should return an array of listings, one with original submisison and one with responses.
-        let result = try await client.searchComment(submission: submission)
+        let result = try await client.searchComment(subreddit: "uwaterloo", articleID: "18lbokl")
         
         
         XCTAssert(result.count > 0)
@@ -151,20 +164,20 @@ final class RedditSessionTest: XCTestCase {
         
         // 1) https://www.reddit.com/r/AmItheAsshole/comments/18m3xgr/aita_for_refusing_to_attend_my_inlaws_christmas/
         // 2) https://www.reddit.com/r/AskReddit/comments/7dljcy/serious_what_can_the_average_joe_do_to_save_net/.json
-        let submission = Submission(id: "7dljcy", subreddit: "AskReddit")
+        //let submission = Submission(id: "7dljcy", subreddit: "AskReddit")
         
         // This should return an array of listings, one with original submisison and one with responses.
         // It's a big thread, so we *should* also get a `more` entry
-        let result = try await client.searchComment(submission: submission, showMore: true)
-        
-        for (i, listing) in result.enumerated() {
-            print("Listing \(i): \(listing.data.children.count) elements")
-            for item in listing.data.children {
-                if item.kind == .more {
-                    print(item)
-                }
-            }
-        }
+        let result = try await client.searchComment(subreddit: "AskReddit", articleID: "7dljcy", showMore: true)
+        
+//        for (i, listing) in result.enumerated() {
+//            print("Listing \(i): \(listing.data.children.count) elements")
+//            for item in listing.data.children {
+//                if item.kind == .more {
+//                    print(item)
+//                }
+//            }
+//        }
                 
         XCTAssert(result.count > 0)
         //XCTAssert(result[1].children.count > 0)
-- 
GitLab