From a26bba28550ef850e55cf735e97d2cf717614dc6 Mon Sep 17 00:00:00 2001 From: Jim Wallace <james.wallace@uwaterloo.ca> Date: Wed, 20 Dec 2023 12:45:15 -0500 Subject: [PATCH] Added basic function to fetch all comments from a submission, returning a thread. --- .../Reddit API/Data Types/MoreComments.swift | 13 ++++ .../Reddit API/Data Types/Submission.swift | 2 + .../Session + Comment Search.swift | 12 ++-- .../Network Endpoints/Session + Info.swift | 31 +++++++++ .../Session + MoreChildren.swift | 25 +++++++- .../Reddit API/Session.swift | 64 ++++++++++++++++--- .../Reddit API/Session Method Tests.swift | 53 +++++++++++++++ ...t => Session Network Endpoint Tests.swift} | 53 +++++++++------ 8 files changed, 215 insertions(+), 38 deletions(-) create mode 100644 Sources/SwiftNLP/1. Data Collection/Reddit API/Network Endpoints/Session + Info.swift create mode 100644 Tests/SwiftNLPTests/Reddit API/Session Method Tests.swift rename Tests/SwiftNLPTests/Reddit API/{Session Tests.swift => Session Network Endpoint Tests.swift} (81%) diff --git a/Sources/SwiftNLP/1. Data Collection/Reddit API/Data Types/MoreComments.swift b/Sources/SwiftNLP/1. Data Collection/Reddit API/Data Types/MoreComments.swift index 7c6000c3..b8eb04b3 100644 --- a/Sources/SwiftNLP/1. Data Collection/Reddit API/Data Types/MoreComments.swift +++ b/Sources/SwiftNLP/1. Data Collection/Reddit API/Data Types/MoreComments.swift @@ -33,3 +33,16 @@ struct RessidtListingMore: RedditDataItem { var id: String? { return nil } var created_utc: Int32? { return nil } // TODO: This is a hack that allows conformance to RedditDataItem ... fix later? } + +struct MoreContainer: Codable { + var json: InnerMoreContainer +} + +struct InnerMoreContainer: Codable { + let errors: [String] + let data: InnerInnerMoreContainer +} + +struct InnerInnerMoreContainer: Codable { + let things: [ListingDataItem] +} diff --git a/Sources/SwiftNLP/1. Data Collection/Reddit API/Data Types/Submission.swift b/Sources/SwiftNLP/1. Data Collection/Reddit API/Data Types/Submission.swift index f6804fcd..614f712e 100644 --- a/Sources/SwiftNLP/1. Data Collection/Reddit API/Data Types/Submission.swift +++ b/Sources/SwiftNLP/1. Data Collection/Reddit API/Data Types/Submission.swift @@ -43,6 +43,8 @@ public struct Submission: RedditDataItem { public let thumbnail: String? public let title: String? public let url: String? + + public var linkID: String { "\(RedditContentType.link)_\(id!)"} } extension Submission { diff --git a/Sources/SwiftNLP/1. Data Collection/Reddit API/Network Endpoints/Session + Comment Search.swift b/Sources/SwiftNLP/1. Data Collection/Reddit API/Network Endpoints/Session + Comment Search.swift index 37bcd894..1cbd9c66 100644 --- a/Sources/SwiftNLP/1. Data Collection/Reddit API/Network Endpoints/Session + Comment Search.swift +++ b/Sources/SwiftNLP/1. Data Collection/Reddit API/Network Endpoints/Session + Comment Search.swift @@ -27,7 +27,9 @@ extension Session { /// Returns a comment tree corresponding to a search of the r/subreddit/comments/article endpoint func searchComment( - submission: Submission, + subreddit: String, + articleID: String, + //submission: Submission, comment: String? = nil, context: UInt = 0, depth: Int? = nil, @@ -43,10 +45,10 @@ extension Session { truncate: UInt = 0 ) async throws -> [Listing] { - guard let subreddit = submission.subreddit, let articleID = submission.id - else { - throw SessionError(message: "Submission must include article data.") - } +// guard let subreddit = submission.subreddit, let articleID = submission.id +// else { +// throw SessionError(message: "Submission must include article data.") +// } var parameters: [String : String] = [String:String]() diff --git a/Sources/SwiftNLP/1. Data Collection/Reddit API/Network Endpoints/Session + Info.swift b/Sources/SwiftNLP/1. Data Collection/Reddit API/Network Endpoints/Session + Info.swift new file mode 100644 index 00000000..6315738a --- /dev/null +++ b/Sources/SwiftNLP/1. Data Collection/Reddit API/Network Endpoints/Session + Info.swift @@ -0,0 +1,31 @@ +// Copyright (c) 2023 Jim Wallace +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +import Foundation + +extension Session { + + // TODO: Add caller for the /api/info endpoint + // This should give information about an arbitrary data point, useful, but is an additional call in many cases and so should be avoided + +} diff --git a/Sources/SwiftNLP/1. Data Collection/Reddit API/Network Endpoints/Session + MoreChildren.swift b/Sources/SwiftNLP/1. Data Collection/Reddit API/Network Endpoints/Session + MoreChildren.swift index dbbc8070..2245850d 100644 --- a/Sources/SwiftNLP/1. Data Collection/Reddit API/Network Endpoints/Session + MoreChildren.swift +++ b/Sources/SwiftNLP/1. Data Collection/Reddit API/Network Endpoints/Session + MoreChildren.swift @@ -29,11 +29,11 @@ extension Session { func moreChildren( linkID: String, children: [String], - id: String?, + id: String? = nil, depth: Int? = nil, limitChildren: Bool = false, sort: ListingSortOrder = .new - ) async throws -> [Listing] { + ) async throws -> ([Comment],[String]) { var parameters: [String : String] = [String:String]() @@ -59,10 +59,29 @@ extension Session { ) do { - return try JSONDecoder().decode([Listing].self, from: data) + let container = try JSONDecoder().decode(MoreContainer.self, from: data) + return _processContainer(container) + } catch { throw SessionError(message: "Unable to decode server response.") } } + func _processContainer(_ c: MoreContainer) -> ([Comment], [String]) { + var comments: [Comment] = [Comment]() + var more: [String] = [String]() + + for child in c.json.data.things { + if child.kind == .comment { + comments.append(child.data as! Comment) + } + if child.kind == .more { + let moreItems = child.data as! RessidtListingMore + more.append(contentsOf: moreItems.children) + } + } + return (comments,more) + } + + } diff --git a/Sources/SwiftNLP/1. Data Collection/Reddit API/Session.swift b/Sources/SwiftNLP/1. Data Collection/Reddit API/Session.swift index 0dee111a..9621f9b1 100644 --- a/Sources/SwiftNLP/1. Data Collection/Reddit API/Session.swift +++ b/Sources/SwiftNLP/1. Data Collection/Reddit API/Session.swift @@ -63,13 +63,57 @@ class Session { } -//extension Session { -// -// func fetchThread(submissionID: String) -> RedditThread? { -// -// -// -// return nil -// } -// -//} +extension Session { + + func fetchThread(subreddit: String, articleID: String) async throws -> RedditThread { + + // TODO: What are the optimal parameters for this call? + let listings = try await searchComment(subreddit: subreddit, articleID: articleID, showMore: true) + + // First listing should contain our submission data + guard listings[0].data.children[0].kind == .link else { + throw SessionError(message: "Error, did not find initial post for this subreddit and articleID") + } + let submission = listings[0].data.children[0].data as! Submission + + // Iteratively unwrap the MoreComments results, until we're all done + var (comments, more) = _processListingIntoCommentsAndMore(listings[1]) + + // While we have more comments to fetch, keep making calls to the MoreChildren endpoint + while !more.isEmpty { + + var toFetch: [String] = [String]() + + if more.count > 100 { + toFetch = Array(more[0 ..< 100]) + more = Array(more[100 ..< more.count ]) + } else { + toFetch = more + more = [String]() + } + + let (c2, m2) = try await moreChildren(linkID: submission.linkID, children: toFetch) + + // Add comments to our list of comments, add more items to our list of more items + comments.append(contentsOf: c2) + more.append(contentsOf: m2) + } + return RedditThread(submission: submission, comments: comments) + } + + func _processListingIntoCommentsAndMore(_ l: Listing) -> ([Comment], [String]) { + var comments: [Comment] = [Comment]() + var more: [String] = [String]() + for child in l.data.children { + if child.kind == .comment { + comments.append(child.data as! Comment) + } + if child.kind == .more { + let moreItems = child.data as! RessidtListingMore + more.append(contentsOf: moreItems.children) + } + } + return (comments,more) + } + +} diff --git a/Tests/SwiftNLPTests/Reddit API/Session Method Tests.swift b/Tests/SwiftNLPTests/Reddit API/Session Method Tests.swift new file mode 100644 index 00000000..199045e1 --- /dev/null +++ b/Tests/SwiftNLPTests/Reddit API/Session Method Tests.swift @@ -0,0 +1,53 @@ +// Copyright (c) 2023 Jim Wallace +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +import Foundation +import XCTest +@testable import SwiftNLP + +final class RedditSessionMethodTest: XCTestCase { + + + func testFetchComments() async throws { + let id = ProcessInfo.processInfo.environment["REDDIT_CLIENT_ID"] ?? nil + let secret = ProcessInfo.processInfo.environment["REDDIT_CLIENT_SECRET"] ?? nil + + guard let id = id, let secret = secret else { + fatalError("Unable to fetch REDDIT_CLIENT_ID and REDDIT_CLIENT_SECRET from ProcessInfo.") + } + + let client = Session(id: id, secret: secret) + guard let _ = try? await client.authenticate() else { + throw SessionError(message: "Error authenticating client.") + } + + // "7dljcy", subreddit: "AskReddit") + // https://www.reddit.com/r/redditdev/comments/7dohn2/why_does_the_following_api_endpoint_return/ + let result: RedditThread = try await client.fetchThread(subreddit: "AskReddit", articleID: "7dljcy") // TODO: We aren't loading all 2610 comments... some deleted, but can we tune the method to get more? + //print("Loaded thread with \(result.comments.count) comments") + //print(result.submission) + //print(result.comments[100]) + + XCTAssert(result.submission.num_comments == 2610 && result.submission.subreddit == "AskReddit") + } +} diff --git a/Tests/SwiftNLPTests/Reddit API/Session Tests.swift b/Tests/SwiftNLPTests/Reddit API/Session Network Endpoint Tests.swift similarity index 81% rename from Tests/SwiftNLPTests/Reddit API/Session Tests.swift rename to Tests/SwiftNLPTests/Reddit API/Session Network Endpoint Tests.swift index 2c30427b..38ddf0ea 100644 --- a/Tests/SwiftNLPTests/Reddit API/Session Tests.swift +++ b/Tests/SwiftNLPTests/Reddit API/Session Network Endpoint Tests.swift @@ -1,18 +1,31 @@ +// Copyright (c) 2023 Jim Wallace // -// File.swift +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: // +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. // -// Created by Nicole Mathis on 2023-07-04. -// - - -// When increasing the number of trees, the output is significantly more accurate however, the time to run becomes much longer +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. import Foundation import XCTest @testable import SwiftNLP -final class RedditSessionTest: XCTestCase { +final class RedditSessionEndpointTest: XCTestCase { func testHasRedditCredentials() throws { @@ -125,10 +138,10 @@ final class RedditSessionTest: XCTestCase { } // https://www.reddit.com/r/uwaterloo/comments/18lbokl/conestoga_college_finally_being_called_out_by_the/ - let submission = Submission(id: "18lbokl", subreddit: "uwaterloo") + //let submission = Submission(id: "18lbokl", subreddit: "uwaterloo") // This should return an array of listings, one with original submisison and one with responses. - let result = try await client.searchComment(submission: submission) + let result = try await client.searchComment(subreddit: "uwaterloo", articleID: "18lbokl") XCTAssert(result.count > 0) @@ -151,20 +164,20 @@ final class RedditSessionTest: XCTestCase { // 1) https://www.reddit.com/r/AmItheAsshole/comments/18m3xgr/aita_for_refusing_to_attend_my_inlaws_christmas/ // 2) https://www.reddit.com/r/AskReddit/comments/7dljcy/serious_what_can_the_average_joe_do_to_save_net/.json - let submission = Submission(id: "7dljcy", subreddit: "AskReddit") + //let submission = Submission(id: "7dljcy", subreddit: "AskReddit") // This should return an array of listings, one with original submisison and one with responses. // It's a big thread, so we *should* also get a `more` entry - let result = try await client.searchComment(submission: submission, showMore: true) - - for (i, listing) in result.enumerated() { - print("Listing \(i): \(listing.data.children.count) elements") - for item in listing.data.children { - if item.kind == .more { - print(item) - } - } - } + let result = try await client.searchComment(subreddit: "AskReddit", articleID: "7dljcy", showMore: true) + +// for (i, listing) in result.enumerated() { +// print("Listing \(i): \(listing.data.children.count) elements") +// for item in listing.data.children { +// if item.kind == .more { +// print(item) +// } +// } +// } XCTAssert(result.count > 0) //XCTAssert(result[1].children.count > 0) -- GitLab