From a1f73576d539a4764146394efc9fea2a22c5883e Mon Sep 17 00:00:00 2001 From: Jim Wallace <james.wallace@uwaterloo.ca> Date: Tue, 19 Dec 2023 15:33:13 -0500 Subject: [PATCH] Refactoring Session, Added MoreComments stubs --- .../Top 20k Subreddit Download.swift | 18 ++-- .../Authentication/AuthResponse.swift | 0 .../Session + Authentication.swift | 0 .../Data Types/AnyDecodableValue.swift | 0 .../Data Types/Comment.swift} | 7 +- .../Data Types/Listing + Codable.swift | 58 ++++++++++++ .../Reddit API/Data Types/Listing.swift | 43 +++++++++ .../Reddit API/Data Types/MoreComments.swift | 35 ++++++++ .../Data Types/RedditCommentData.swift | 6 +- .../Data Types/RedditContainer.swift | 0 .../Data Types/RedditDataItem.swift | 0 .../Data Types/RedditSubmissionData.swift | 6 +- .../Data Types/RedditThread.swift | 6 +- .../Data Types/Submission.swift} | 4 +- .../{Reddit => Reddit API}/Enumerations.swift | 2 + .../Session + Comment Search.swift} | 14 +-- .../Session + MoreChildren.swift | 68 ++++++++++++++ .../Session + Subreddit Search.swift} | 6 +- .../Session + User Search.swift} | 22 ++--- .../ReadRedditCommentFile.swift | 0 .../{Reddit => Reddit API}/Session.swift | 0 .../{Reddit => Reddit API}/SessionError.swift | 0 .../readRedditSubmissionData.swift | 0 .../Reddit/Data Types/Listing.swift | 90 ------------------- .../Reddit API/Session Tests.swift | 49 ++++++++-- 25 files changed, 292 insertions(+), 142 deletions(-) rename Sources/SwiftNLP/1. Data Collection/{Reddit => Reddit API}/Authentication/AuthResponse.swift (100%) rename Sources/SwiftNLP/1. Data Collection/{Reddit => Reddit API}/Authentication/Session + Authentication.swift (100%) rename Sources/SwiftNLP/1. Data Collection/{Reddit => Reddit API}/Data Types/AnyDecodableValue.swift (100%) rename Sources/SwiftNLP/1. Data Collection/{Reddit/Data Types/RedditComment.swift => Reddit API/Data Types/Comment.swift} (90%) create mode 100644 Sources/SwiftNLP/1. Data Collection/Reddit API/Data Types/Listing + Codable.swift create mode 100644 Sources/SwiftNLP/1. Data Collection/Reddit API/Data Types/Listing.swift create mode 100644 Sources/SwiftNLP/1. Data Collection/Reddit API/Data Types/MoreComments.swift rename Sources/SwiftNLP/1. Data Collection/{Reddit => Reddit API}/Data Types/RedditCommentData.swift (89%) rename Sources/SwiftNLP/1. Data Collection/{Reddit => Reddit API}/Data Types/RedditContainer.swift (100%) rename Sources/SwiftNLP/1. Data Collection/{Reddit => Reddit API}/Data Types/RedditDataItem.swift (100%) rename Sources/SwiftNLP/1. Data Collection/{Reddit => Reddit API}/Data Types/RedditSubmissionData.swift (88%) rename Sources/SwiftNLP/1. Data Collection/{Reddit => Reddit API}/Data Types/RedditThread.swift (94%) rename Sources/SwiftNLP/1. Data Collection/{Reddit/Data Types/RedditSubmission.swift => Reddit API/Data Types/Submission.swift} (97%) rename Sources/SwiftNLP/1. Data Collection/{Reddit => Reddit API}/Enumerations.swift (97%) rename Sources/SwiftNLP/1. Data Collection/{Reddit/Network Endpoints/RedditClient + Comment Search.swift => Reddit API/Network Endpoints/Session + Comment Search.swift} (91%) create mode 100644 Sources/SwiftNLP/1. Data Collection/Reddit API/Network Endpoints/Session + MoreChildren.swift rename Sources/SwiftNLP/1. Data Collection/{Reddit/Network Endpoints/RedditClient + Subreddit Search.swift => Reddit API/Network Endpoints/Session + Subreddit Search.swift} (95%) rename Sources/SwiftNLP/1. Data Collection/{Reddit/Network Endpoints/RedditClient + User Search.swift => Reddit API/Network Endpoints/Session + User Search.swift} (96%) rename Sources/SwiftNLP/1. Data Collection/{Reddit => Reddit API}/ReadRedditCommentFile.swift (100%) rename Sources/SwiftNLP/1. Data Collection/{Reddit => Reddit API}/Session.swift (100%) rename Sources/SwiftNLP/1. Data Collection/{Reddit => Reddit API}/SessionError.swift (100%) rename Sources/SwiftNLP/1. Data Collection/{Reddit => Reddit API}/readRedditSubmissionData.swift (100%) delete mode 100644 Sources/SwiftNLP/1. Data Collection/Reddit/Data Types/Listing.swift diff --git a/Sources/SwiftNLP/1. Data Collection/CWorld.AI/Top 20k Subreddit Download.swift b/Sources/SwiftNLP/1. Data Collection/CWorld.AI/Top 20k Subreddit Download.swift index cf23de82..cb97a864 100644 --- a/Sources/SwiftNLP/1. Data Collection/CWorld.AI/Top 20k Subreddit Download.swift +++ b/Sources/SwiftNLP/1. Data Collection/CWorld.AI/Top 20k Subreddit Download.swift @@ -41,17 +41,17 @@ public func downloadSubredditFromServer(subreddit: String, source: String = "htt // Once we have submissions data, let _ = try await submissionsData debugPrint("Processing submission data...") - let (submissions, _ ): ([RedditSubmission],[Data]) = try await processRedditDataFile(submissionsData, verbose: verbose) // TODO: Figure out what to do with error data + let (submissions, _ ): ([Submission],[Data]) = try await processRedditDataFile(submissionsData, verbose: verbose) // TODO: Figure out what to do with error data for submission in submissions { // Create a new thread for each submission, index by submission ID - result[submission.id!] = RedditThread(submission: submission, comments: [RedditComment]()) + result[submission.id!] = RedditThread(submission: submission, comments: [Comment]()) } debugPrint("Completed processing submissions.") // Then fill in the comments once we have them.. let _ = try await commentsData debugPrint("Processing comments data...") - let (comments, _ ): ([RedditComment],[Data]) = try await processRedditDataFile(commentsData, verbose: verbose) // TODO: Figure out what to do with error data + let (comments, _ ): ([Comment],[Data]) = try await processRedditDataFile(commentsData, verbose: verbose) // TODO: Figure out what to do with error data for comment in comments { if var thread = result[comment.link_id!] { thread.add(comment) @@ -70,9 +70,9 @@ public func downloadSubredditFromServer(subreddit: String, source: String = "htt // Load submission data -public func downloadSubmissionsFromServer(subreddit: String, source: String = "https://reddit-archive.cworld.ai/", verbose: Bool = false) async throws -> [RedditSubmission] { +public func downloadSubmissionsFromServer(subreddit: String, source: String = "https://reddit-archive.cworld.ai/", verbose: Bool = false) async throws -> [Submission] { - var result = [RedditSubmission]() + var result = [Submission]() let submissionsURL = source + subreddit + "_submissions.zst" debugPrint("Downloading \(submissionsURL)") @@ -86,7 +86,7 @@ public func downloadSubmissionsFromServer(subreddit: String, source: String = "h // Once we have submissions data, let _ = try await submissionsData debugPrint("Processing submission data...") - let (submissions, _ ): ([RedditSubmission],[Data]) = try await processRedditDataFile(submissionsData, verbose: verbose) // TODO: Figure out what to do with error data + let (submissions, _ ): ([Submission],[Data]) = try await processRedditDataFile(submissionsData, verbose: verbose) // TODO: Figure out what to do with error data debugPrint("Completed processing submissions.") result = submissions @@ -100,9 +100,9 @@ public func downloadSubmissionsFromServer(subreddit: String, source: String = "h } // Load comments data -public func downloadCommentsFromServer(subreddit: String, source: String = "https://reddit-archive.cworld.ai/", verbose: Bool = false) async throws -> [RedditComment] { +public func downloadCommentsFromServer(subreddit: String, source: String = "https://reddit-archive.cworld.ai/", verbose: Bool = false) async throws -> [Comment] { - var result = [RedditComment]() + var result = [Comment]() let commentsURL = source + subreddit + "_comments.zst" debugPrint("Downloading \(commentsURL)") @@ -116,7 +116,7 @@ public func downloadCommentsFromServer(subreddit: String, source: String = "http // Once we have submissions data, let _ = try await commentsData debugPrint("Processing submission data...") - let (comments, _ ): ([RedditComment],[Data]) = try await processRedditDataFile(commentsData, verbose: verbose) // TODO: Figure out what to do with error data + let (comments, _ ): ([Comment],[Data]) = try await processRedditDataFile(commentsData, verbose: verbose) // TODO: Figure out what to do with error data debugPrint("Completed processing comments.") result = comments diff --git a/Sources/SwiftNLP/1. Data Collection/Reddit/Authentication/AuthResponse.swift b/Sources/SwiftNLP/1. Data Collection/Reddit API/Authentication/AuthResponse.swift similarity index 100% rename from Sources/SwiftNLP/1. Data Collection/Reddit/Authentication/AuthResponse.swift rename to Sources/SwiftNLP/1. Data Collection/Reddit API/Authentication/AuthResponse.swift diff --git a/Sources/SwiftNLP/1. Data Collection/Reddit/Authentication/Session + Authentication.swift b/Sources/SwiftNLP/1. Data Collection/Reddit API/Authentication/Session + Authentication.swift similarity index 100% rename from Sources/SwiftNLP/1. Data Collection/Reddit/Authentication/Session + Authentication.swift rename to Sources/SwiftNLP/1. Data Collection/Reddit API/Authentication/Session + Authentication.swift diff --git a/Sources/SwiftNLP/1. Data Collection/Reddit/Data Types/AnyDecodableValue.swift b/Sources/SwiftNLP/1. Data Collection/Reddit API/Data Types/AnyDecodableValue.swift similarity index 100% rename from Sources/SwiftNLP/1. Data Collection/Reddit/Data Types/AnyDecodableValue.swift rename to Sources/SwiftNLP/1. Data Collection/Reddit API/Data Types/AnyDecodableValue.swift diff --git a/Sources/SwiftNLP/1. Data Collection/Reddit/Data Types/RedditComment.swift b/Sources/SwiftNLP/1. Data Collection/Reddit API/Data Types/Comment.swift similarity index 90% rename from Sources/SwiftNLP/1. Data Collection/Reddit/Data Types/RedditComment.swift rename to Sources/SwiftNLP/1. Data Collection/Reddit API/Data Types/Comment.swift index 038fef0e..2a06d694 100644 --- a/Sources/SwiftNLP/1. Data Collection/Reddit/Data Types/RedditComment.swift +++ b/Sources/SwiftNLP/1. Data Collection/Reddit API/Data Types/Comment.swift @@ -21,7 +21,11 @@ // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR // OTHER DEALINGS IN THE SOFTWARE. -public struct RedditComment: RedditDataItem { +public struct Comment: RedditDataItem { +// public static func == (lhs: Comment, rhs: Comment) -> Bool { +// return lhs.id == rhs.id +// } + public let author: String? public let author_created_utc: Int32? public let author_flair_css_class: String? @@ -42,4 +46,5 @@ public struct RedditComment: RedditDataItem { public let score_hidden: Bool? public let subreddit: String? public let subreddit_id: String? + //let replies: Listing? } diff --git a/Sources/SwiftNLP/1. Data Collection/Reddit API/Data Types/Listing + Codable.swift b/Sources/SwiftNLP/1. Data Collection/Reddit API/Data Types/Listing + Codable.swift new file mode 100644 index 00000000..7ca1d6e2 --- /dev/null +++ b/Sources/SwiftNLP/1. Data Collection/Reddit API/Data Types/Listing + Codable.swift @@ -0,0 +1,58 @@ +// +// File.swift +// +// +// Created by Jim Wallace on 2023-12-19. +// + +import Foundation + +extension ListingDataItem { + + enum CodingKeys: String, CodingKey { + case kind + case data + } + + init(from decoder: Decoder) throws { + + let container = try decoder.container(keyedBy: CodingKeys.self) + kind = try container.decode(RedditContentType.self, forKey: .kind) + + switch kind { + case .comment: + data = try container.decode(Comment.self, forKey: .data) + + case .link: + data = try container.decode(Submission.self, forKey: .data) + + case .more: + //debugPrint("FOUND MORE") + data = try container.decode(RessidtListingMore.self, forKey: .data) + + default: + throw SessionError(message: "Unknown type of Reddit content from JSON.") + } + } + +// TODO: We don't actually need this, I'm being lazy and not implementing it + // Custom Encoder + func encode(to encoder: Encoder) throws { + var container = encoder.container(keyedBy: CodingKeys.self) + try container.encode(kind, forKey: .kind) + +// switch kind { +// case .comment: +// guard let comment = data as? Comment else { +// throw EncodingError.invalidValue(data, .init(codingPath: encoder.codingPath, debugDescription: "Mismatched type for RedditComment")) +// } +// try container.encode(comment, forKey: .data) +// +// case .submission: +// guard let submission = data as? Submission else { +// throw EncodingError.invalidValue(data, .init(codingPath: encoder.codingPath, debugDescription: "Mismatched type for RedditSubmission")) +// } +// try container.encode(submission, forKey: .data) +// } + } +} diff --git a/Sources/SwiftNLP/1. Data Collection/Reddit API/Data Types/Listing.swift b/Sources/SwiftNLP/1. Data Collection/Reddit API/Data Types/Listing.swift new file mode 100644 index 00000000..4e3fbc8a --- /dev/null +++ b/Sources/SwiftNLP/1. Data Collection/Reddit API/Data Types/Listing.swift @@ -0,0 +1,43 @@ +// Copyright (c) 2023 Jim Wallace +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +import Foundation + +struct Listing: Codable { + let kind: String + let data: ListingData +} + +struct ListingData: Codable { + let after: String? + let dist: Int? + let modhash: String? + let before: String? + let geo_filter: String? + let children: [ListingDataItem] +} + +struct ListingDataItem: Codable { + let kind: RedditContentType + let data: any RedditDataItem +} diff --git a/Sources/SwiftNLP/1. Data Collection/Reddit API/Data Types/MoreComments.swift b/Sources/SwiftNLP/1. Data Collection/Reddit API/Data Types/MoreComments.swift new file mode 100644 index 00000000..7c6000c3 --- /dev/null +++ b/Sources/SwiftNLP/1. Data Collection/Reddit API/Data Types/MoreComments.swift @@ -0,0 +1,35 @@ +// Copyright (c) 2023 Jim Wallace +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +struct RessidtListingMore: RedditDataItem { + + //let name: String? + //let kind: String? + + let parentId: String? + let count: Int? + let children: [String] // TODO: Is this the only field we need? + + var id: String? { return nil } + var created_utc: Int32? { return nil } // TODO: This is a hack that allows conformance to RedditDataItem ... fix later? +} diff --git a/Sources/SwiftNLP/1. Data Collection/Reddit/Data Types/RedditCommentData.swift b/Sources/SwiftNLP/1. Data Collection/Reddit API/Data Types/RedditCommentData.swift similarity index 89% rename from Sources/SwiftNLP/1. Data Collection/Reddit/Data Types/RedditCommentData.swift rename to Sources/SwiftNLP/1. Data Collection/Reddit API/Data Types/RedditCommentData.swift index 9c52610f..5d7649dc 100644 --- a/Sources/SwiftNLP/1. Data Collection/Reddit/Data Types/RedditCommentData.swift +++ b/Sources/SwiftNLP/1. Data Collection/Reddit API/Data Types/RedditCommentData.swift @@ -22,12 +22,12 @@ // OTHER DEALINGS IN THE SOFTWARE. public struct RedditCommentData : RedditContainer { - public typealias ItemType = RedditComment + public typealias ItemType = Comment public var index: [String: Int32?] - public var posts: [RedditComment] + public var posts: [Comment] - public init(index: [String: Int32?], posts: [RedditComment]) { + public init(index: [String: Int32?], posts: [Comment]) { self.index = index self.posts = posts } diff --git a/Sources/SwiftNLP/1. Data Collection/Reddit/Data Types/RedditContainer.swift b/Sources/SwiftNLP/1. Data Collection/Reddit API/Data Types/RedditContainer.swift similarity index 100% rename from Sources/SwiftNLP/1. Data Collection/Reddit/Data Types/RedditContainer.swift rename to Sources/SwiftNLP/1. Data Collection/Reddit API/Data Types/RedditContainer.swift diff --git a/Sources/SwiftNLP/1. Data Collection/Reddit/Data Types/RedditDataItem.swift b/Sources/SwiftNLP/1. Data Collection/Reddit API/Data Types/RedditDataItem.swift similarity index 100% rename from Sources/SwiftNLP/1. Data Collection/Reddit/Data Types/RedditDataItem.swift rename to Sources/SwiftNLP/1. Data Collection/Reddit API/Data Types/RedditDataItem.swift diff --git a/Sources/SwiftNLP/1. Data Collection/Reddit/Data Types/RedditSubmissionData.swift b/Sources/SwiftNLP/1. Data Collection/Reddit API/Data Types/RedditSubmissionData.swift similarity index 88% rename from Sources/SwiftNLP/1. Data Collection/Reddit/Data Types/RedditSubmissionData.swift rename to Sources/SwiftNLP/1. Data Collection/Reddit API/Data Types/RedditSubmissionData.swift index 9b0ffb8c..bb652e0b 100644 --- a/Sources/SwiftNLP/1. Data Collection/Reddit/Data Types/RedditSubmissionData.swift +++ b/Sources/SwiftNLP/1. Data Collection/Reddit API/Data Types/RedditSubmissionData.swift @@ -22,12 +22,12 @@ // OTHER DEALINGS IN THE SOFTWARE. public struct RedditSubmissionData : RedditContainer { - public typealias ItemType = RedditSubmission + public typealias ItemType = Submission public var index: [String: Int32?] - public var posts: [RedditSubmission] + public var posts: [Submission] - public init(index: [String: Int32?], posts: [RedditSubmission]) { + public init(index: [String: Int32?], posts: [Submission]) { self.index = index self.posts = posts } diff --git a/Sources/SwiftNLP/1. Data Collection/Reddit/Data Types/RedditThread.swift b/Sources/SwiftNLP/1. Data Collection/Reddit API/Data Types/RedditThread.swift similarity index 94% rename from Sources/SwiftNLP/1. Data Collection/Reddit/Data Types/RedditThread.swift rename to Sources/SwiftNLP/1. Data Collection/Reddit API/Data Types/RedditThread.swift index e519d9ec..bab10797 100644 --- a/Sources/SwiftNLP/1. Data Collection/Reddit/Data Types/RedditThread.swift +++ b/Sources/SwiftNLP/1. Data Collection/Reddit API/Data Types/RedditThread.swift @@ -25,8 +25,8 @@ import Foundation public struct RedditThread: SNLPDataItem { - public let submission: RedditSubmission - public var comments: [RedditComment] + public let submission: Submission + public var comments: [Comment] public var id: String { return submission.id ?? "ERROR" } public var createdOn: Date { return Date(timeIntervalSince1970: TimeInterval(submission.created_utc ?? 0)) } @@ -61,7 +61,7 @@ extension RedditThread { return false } - public mutating func add(_ comment: RedditComment) { + public mutating func add(_ comment: Comment) { comments.append(comment) } } diff --git a/Sources/SwiftNLP/1. Data Collection/Reddit/Data Types/RedditSubmission.swift b/Sources/SwiftNLP/1. Data Collection/Reddit API/Data Types/Submission.swift similarity index 97% rename from Sources/SwiftNLP/1. Data Collection/Reddit/Data Types/RedditSubmission.swift rename to Sources/SwiftNLP/1. Data Collection/Reddit API/Data Types/Submission.swift index de7a0777..e62ce11f 100644 --- a/Sources/SwiftNLP/1. Data Collection/Reddit/Data Types/RedditSubmission.swift +++ b/Sources/SwiftNLP/1. Data Collection/Reddit API/Data Types/Submission.swift @@ -21,7 +21,7 @@ // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR // OTHER DEALINGS IN THE SOFTWARE. -public struct RedditSubmission: RedditDataItem { +public struct Submission: RedditDataItem { public let author: String? public let author_flair_css_class: String? public let author_flair_text: String? @@ -44,7 +44,7 @@ public struct RedditSubmission: RedditDataItem { public let url: String? } -extension RedditSubmission { +extension Submission { /// Initializes a RedditSubission with whatever information is provided, makes all other fields nil public init(author: String? = nil, diff --git a/Sources/SwiftNLP/1. Data Collection/Reddit/Enumerations.swift b/Sources/SwiftNLP/1. Data Collection/Reddit API/Enumerations.swift similarity index 97% rename from Sources/SwiftNLP/1. Data Collection/Reddit/Enumerations.swift rename to Sources/SwiftNLP/1. Data Collection/Reddit API/Enumerations.swift index 9f983f20..93af0308 100644 --- a/Sources/SwiftNLP/1. Data Collection/Reddit/Enumerations.swift +++ b/Sources/SwiftNLP/1. Data Collection/Reddit API/Enumerations.swift @@ -28,6 +28,8 @@ enum RedditContentType: String, CustomStringConvertible, Codable { case message = "t4" case subreddit = "t5" case award = "t6" + case listing = "Listing" + case more = "more" var description: String { rawValue } } diff --git a/Sources/SwiftNLP/1. Data Collection/Reddit/Network Endpoints/RedditClient + Comment Search.swift b/Sources/SwiftNLP/1. Data Collection/Reddit API/Network Endpoints/Session + Comment Search.swift similarity index 91% rename from Sources/SwiftNLP/1. Data Collection/Reddit/Network Endpoints/RedditClient + Comment Search.swift rename to Sources/SwiftNLP/1. Data Collection/Reddit API/Network Endpoints/Session + Comment Search.swift index ed1f45dc..37bcd894 100644 --- a/Sources/SwiftNLP/1. Data Collection/Reddit/Network Endpoints/RedditClient + Comment Search.swift +++ b/Sources/SwiftNLP/1. Data Collection/Reddit API/Network Endpoints/Session + Comment Search.swift @@ -27,7 +27,7 @@ extension Session { /// Returns a comment tree corresponding to a search of the r/subreddit/comments/article endpoint func searchComment( - submission: RedditSubmission, + submission: Submission, comment: String? = nil, context: UInt = 0, depth: Int? = nil, @@ -41,7 +41,7 @@ extension Session { theme: RedditTheme = .light, threaded: Bool = false, truncate: UInt = 0 - ) async throws -> [RedditListing] { + ) async throws -> [Listing] { guard let subreddit = submission.subreddit, let articleID = submission.id else { @@ -85,14 +85,8 @@ extension Session { ) do { - let redditListing = try JSONDecoder().decode([RedditListing].self, from: data) - - for listing in redditListing { - print(listing) - } - - return redditListing - + return try JSONDecoder().decode([Listing].self, from: data) + //return redditListing } catch { throw SessionError(message: "Unable to decode server response.") } diff --git a/Sources/SwiftNLP/1. Data Collection/Reddit API/Network Endpoints/Session + MoreChildren.swift b/Sources/SwiftNLP/1. Data Collection/Reddit API/Network Endpoints/Session + MoreChildren.swift new file mode 100644 index 00000000..dbbc8070 --- /dev/null +++ b/Sources/SwiftNLP/1. Data Collection/Reddit API/Network Endpoints/Session + MoreChildren.swift @@ -0,0 +1,68 @@ +// Copyright (c) 2023 Jim Wallace +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +import Foundation + +extension Session { + + /// Returns a comment tree corresponding to a search of the r/subreddit/comments/article endpoint + func moreChildren( + linkID: String, + children: [String], + id: String?, + depth: Int? = nil, + limitChildren: Bool = false, + sort: ListingSortOrder = .new + ) async throws -> [Listing] { + + + var parameters: [String : String] = [String:String]() + parameters["api_type"] = "json" + + if let depth = depth { + parameters["depth"] = String(depth) + } + + parameters["sort"] = sort.rawValue + + parameters["children"] = children.joined(separator: ", ") + parameters["link_id"] = linkID + parameters["limit_children"] = String(limitChildren).lowercased() + + if let id = id { + parameters["id"] = id + } + + let (data, _ ) = try await _GET( + endpoint: "api/morechildren", + parameters: parameters + ) + + do { + return try JSONDecoder().decode([Listing].self, from: data) + } catch { + throw SessionError(message: "Unable to decode server response.") + } + } + +} diff --git a/Sources/SwiftNLP/1. Data Collection/Reddit/Network Endpoints/RedditClient + Subreddit Search.swift b/Sources/SwiftNLP/1. Data Collection/Reddit API/Network Endpoints/Session + Subreddit Search.swift similarity index 95% rename from Sources/SwiftNLP/1. Data Collection/Reddit/Network Endpoints/RedditClient + Subreddit Search.swift rename to Sources/SwiftNLP/1. Data Collection/Reddit API/Network Endpoints/Session + Subreddit Search.swift index ada3a5c4..36c0772f 100644 --- a/Sources/SwiftNLP/1. Data Collection/Reddit/Network Endpoints/RedditClient + Subreddit Search.swift +++ b/Sources/SwiftNLP/1. Data Collection/Reddit API/Network Endpoints/Session + Subreddit Search.swift @@ -41,7 +41,7 @@ extension Session { time: ListingTime = .all, restrictSubreddit: Bool = true, type: Set<RedditContentType> = [.link] - ) async throws -> RedditListing { + ) async throws -> Listing { guard query.count < 512 else { throw SessionError(message: "Query length must be less than 512 characters.") @@ -86,8 +86,8 @@ extension Session { let (data, _) = try await _GET(endpoint: "r/\(subreddit)/search", parameters: parameters) do { - let redditListing = try JSONDecoder().decode(RedditListing.self, from: data) - return redditListing + return try JSONDecoder().decode(Listing.self, from: data) + //return redditListing } catch { throw SessionError(message: "Unable to decode server response.") diff --git a/Sources/SwiftNLP/1. Data Collection/Reddit/Network Endpoints/RedditClient + User Search.swift b/Sources/SwiftNLP/1. Data Collection/Reddit API/Network Endpoints/Session + User Search.swift similarity index 96% rename from Sources/SwiftNLP/1. Data Collection/Reddit/Network Endpoints/RedditClient + User Search.swift rename to Sources/SwiftNLP/1. Data Collection/Reddit API/Network Endpoints/Session + User Search.swift index 7d5453a3..48c7b3e7 100644 --- a/Sources/SwiftNLP/1. Data Collection/Reddit/Network Endpoints/RedditClient + User Search.swift +++ b/Sources/SwiftNLP/1. Data Collection/Reddit API/Network Endpoints/Session + User Search.swift @@ -38,7 +38,7 @@ extension Session { count: UInt = 0, limit: Int? = nil, expandSubreddits: Bool = false - ) async throws -> RedditListing { + ) async throws -> Listing { return try await _searchUserEndpoint("overview", userName: userName, context: context, @@ -66,7 +66,7 @@ extension Session { count: UInt = 0, limit: Int? = nil, expandSubreddits: Bool = false - ) async throws -> RedditListing { + ) async throws -> Listing { return try await _searchUserEndpoint("submitted", userName: userName, context: context, @@ -94,7 +94,7 @@ extension Session { count: UInt = 0, limit: Int? = nil, expandSubreddits: Bool = false - ) async throws -> RedditListing { + ) async throws -> Listing { return try await _searchUserEndpoint("comments", userName: userName, context: context, @@ -122,7 +122,7 @@ extension Session { count: UInt = 0, limit: Int? = nil, expandSubreddits: Bool = false - ) async throws -> RedditListing { + ) async throws -> Listing { return try await _searchUserEndpoint("upvoted", userName: userName, context: context, @@ -150,7 +150,7 @@ extension Session { count: UInt = 0, limit: Int? = nil, expandSubreddits: Bool = false - ) async throws -> RedditListing { + ) async throws -> Listing { return try await _searchUserEndpoint("downvoted", userName: userName, context: context, @@ -178,7 +178,7 @@ extension Session { count: UInt = 0, limit: Int? = nil, expandSubreddits: Bool = false - ) async throws -> RedditListing { + ) async throws -> Listing { return try await _searchUserEndpoint("hidden", userName: userName, context: context, @@ -206,7 +206,7 @@ extension Session { count: UInt = 0, limit: Int? = nil, expandSubreddits: Bool = false - ) async throws -> RedditListing { + ) async throws -> Listing { return try await _searchUserEndpoint("saved", userName: userName, context: context, @@ -234,7 +234,7 @@ extension Session { count: UInt = 0, limit: Int? = nil, expandSubreddits: Bool = false - ) async throws -> RedditListing { + ) async throws -> Listing { return try await _searchUserEndpoint("gilded", userName: userName, context: context, @@ -264,7 +264,7 @@ extension Session { count: UInt = 0, limit: Int? = nil, expandSubreddits: Bool = false - ) async throws -> RedditListing { + ) async throws -> Listing { var parameters: [String : String] = [String:String]() @@ -303,8 +303,8 @@ extension Session { ) do { - let redditListing = try JSONDecoder().decode(RedditListing.self, from: data) - return redditListing + return try JSONDecoder().decode(Listing.self, from: data) + //return redditListing } catch { throw SessionError(message: "Unable to decode server response.") diff --git a/Sources/SwiftNLP/1. Data Collection/Reddit/ReadRedditCommentFile.swift b/Sources/SwiftNLP/1. Data Collection/Reddit API/ReadRedditCommentFile.swift similarity index 100% rename from Sources/SwiftNLP/1. Data Collection/Reddit/ReadRedditCommentFile.swift rename to Sources/SwiftNLP/1. Data Collection/Reddit API/ReadRedditCommentFile.swift diff --git a/Sources/SwiftNLP/1. Data Collection/Reddit/Session.swift b/Sources/SwiftNLP/1. Data Collection/Reddit API/Session.swift similarity index 100% rename from Sources/SwiftNLP/1. Data Collection/Reddit/Session.swift rename to Sources/SwiftNLP/1. Data Collection/Reddit API/Session.swift diff --git a/Sources/SwiftNLP/1. Data Collection/Reddit/SessionError.swift b/Sources/SwiftNLP/1. Data Collection/Reddit API/SessionError.swift similarity index 100% rename from Sources/SwiftNLP/1. Data Collection/Reddit/SessionError.swift rename to Sources/SwiftNLP/1. Data Collection/Reddit API/SessionError.swift diff --git a/Sources/SwiftNLP/1. Data Collection/Reddit/readRedditSubmissionData.swift b/Sources/SwiftNLP/1. Data Collection/Reddit API/readRedditSubmissionData.swift similarity index 100% rename from Sources/SwiftNLP/1. Data Collection/Reddit/readRedditSubmissionData.swift rename to Sources/SwiftNLP/1. Data Collection/Reddit API/readRedditSubmissionData.swift diff --git a/Sources/SwiftNLP/1. Data Collection/Reddit/Data Types/Listing.swift b/Sources/SwiftNLP/1. Data Collection/Reddit/Data Types/Listing.swift deleted file mode 100644 index 6bae633f..00000000 --- a/Sources/SwiftNLP/1. Data Collection/Reddit/Data Types/Listing.swift +++ /dev/null @@ -1,90 +0,0 @@ -// Copyright (c) 2023 Jim Wallace -// -// Permission is hereby granted, free of charge, to any person -// obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without -// restriction, including without limitation the rights to use, -// copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following -// conditions: -// -// The above copyright notice and this permission notice shall be -// included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -// OTHER DEALINGS IN THE SOFTWARE. - -import Foundation - -struct RedditListing: Decodable { - let kind: String - let data: RedditListingData -} - -struct RedditListingData: Decodable { - let after: String? - let dist: Int? - let modhash: String? - let before: String? - let geo_filter: String? - let children: [RedditListingDataItem] -} - -struct RedditListingDataItem: Decodable { - let kind: RedditContentType - let data: any RedditDataItem -} - -extension RedditListingDataItem { - - enum CodingKeys: String, CodingKey { - case kind - case data - } - - init(from decoder: Decoder) throws { - - let container = try decoder.container(keyedBy: CodingKeys.self) - kind = try container.decode(RedditContentType.self, forKey: .kind) - - switch kind { - case .comment: - data = try container.decode(RedditComment.self, forKey: .data) - - case .link: - data = try container.decode(RedditSubmission.self, forKey: .data) - - default: - throw SessionError(message: "Failed to load reddit content from JSON.") - } - } -} - -//extension RedditListingDataItem { -// // Custom Encoder -// func encode(to encoder: Encoder) throws { -// var container = encoder.container(keyedBy: CodingKeys.self) -// try container.encode(kind, forKey: .kind) -// -// switch kind { -// case .comment: -// guard let comment = data as? RedditComment else { -// throw EncodingError.invalidValue(data, .init(codingPath: encoder.codingPath, debugDescription: "Mismatched type for RedditComment")) -// } -// try container.encode(comment, forKey: .data) -// -// case .submission: -// guard let submission = data as? RedditSubmission else { -// throw EncodingError.invalidValue(data, .init(codingPath: encoder.codingPath, debugDescription: "Mismatched type for RedditSubmission")) -// } -// try container.encode(submission, forKey: .data) -// } -// } -//} diff --git a/Tests/SwiftNLPTests/Reddit API/Session Tests.swift b/Tests/SwiftNLPTests/Reddit API/Session Tests.swift index bd0a6ea4..c3892f01 100644 --- a/Tests/SwiftNLPTests/Reddit API/Session Tests.swift +++ b/Tests/SwiftNLPTests/Reddit API/Session Tests.swift @@ -12,7 +12,7 @@ import Foundation import XCTest @testable import SwiftNLP -final class RedditClientTest: XCTestCase { +final class RedditSessionTest: XCTestCase { func testHasRedditCredentials() throws { @@ -87,7 +87,7 @@ final class RedditClientTest: XCTestCase { throw SessionError(message: "Error authenticating client.") } - let result: RedditListing = try await client.searchSubreddit("uwaterloo", query: "goose", limit: 10) + let result: Listing = try await client.searchSubreddit("uwaterloo", query: "goose", limit: 10) // let r2: RedditListing = try await client.searchSubreddit("uwaterloo", query: "goose", limit: 10, type: [.comment] ) // for r in r2.children { @@ -112,7 +112,7 @@ final class RedditClientTest: XCTestCase { } // https://www.reddit.com/r/uwaterloo/comments/18lbokl/conestoga_college_finally_being_called_out_by_the/ - let submission = RedditSubmission(id: "18lbokl", subreddit: "uwaterloo") + let submission = Submission(id: "18lbokl", subreddit: "uwaterloo") // This should return an array of listings, one with original submisison and one with responses. let result = try await client.searchComment(submission: submission) @@ -122,6 +122,41 @@ final class RedditClientTest: XCTestCase { //XCTAssert(result[1].children.count > 0) } + func testCommentSearchWithMoreChildren() async throws { + + let id = ProcessInfo.processInfo.environment["REDDIT_CLIENT_ID"] ?? nil + let secret = ProcessInfo.processInfo.environment["REDDIT_CLIENT_SECRET"] ?? nil + + guard let id = id, let secret = secret else { + fatalError("Unable to fetch REDDIT_CLIENT_ID and REDDIT_CLIENT_SECRET from ProcessInfo.") + } + + let client = Session(id: id, secret: secret) + guard let _ = try? await client.authenticate() else { + throw SessionError(message: "Error authenticating client.") + } + + // 1) https://www.reddit.com/r/AmItheAsshole/comments/18m3xgr/aita_for_refusing_to_attend_my_inlaws_christmas/ + // 2) https://www.reddit.com/r/AskReddit/comments/7dljcy/serious_what_can_the_average_joe_do_to_save_net/.json + let submission = Submission(id: "7dljcy", subreddit: "AskReddit") + + // This should return an array of listings, one with original submisison and one with responses. + // It's a big thread, so we *should* also get a `more` entry + let result = try await client.searchComment(submission: submission, showMore: true) + + for (i, listing) in result.enumerated() { + print("Listing \(i): \(listing.data.children.count) elements") + for item in listing.data.children { + if item.kind == .more { + print(item) + } + } + } + + XCTAssert(result.count > 0) + //XCTAssert(result[1].children.count > 0) + } + func testUserSearch() async throws { let id = ProcessInfo.processInfo.environment["REDDIT_CLIENT_ID"] ?? nil @@ -148,16 +183,16 @@ final class RedditClientTest: XCTestCase { //let upvotedResult = try await client.searchUserUpvoted(userName: "jimntonik") // TODO: 403 - Forbidden - requires login? //XCTAssert(upvotedResult.children.count > 0) - //let downvotedResult = try await client.searchUserDownvoted(userName: "jimntonik") // TODO: 403 - Forbidden + //let downvotedResult = try await client.searchUserDownvoted(userName: "jimntonik") // TODO: 403 - Forbidden - requires login? //XCTAssert(downvotedResult.children.count > 0) - //let hiddenResult = try await client.searchUserHidden(userName: "jimntonik") // TODO: 403 - Forbidden + //let hiddenResult = try await client.searchUserHidden(userName: "jimntonik") // TODO: 403 - Forbidden - requires login? //XCTAssert(hiddenResult.children.count > 0) - //let savedResult = try await client.searchUserSaved(userName: "jimntonik") // TODO: 403 - Forbidden + //let savedResult = try await client.searchUserSaved(userName: "jimntonik") // TODO: 403 - Forbidden - requires login? //XCTAssert(savedResult.children.count > 0) - //let gildedResult = try await client.searchUserGilded(userName: "jimntonik") // TODO: 403 - Forbidden + //let gildedResult = try await client.searchUserGilded(userName: "jimntonik") // TODO: 403 - Forbidden - requires login? //XCTAssert(gildedResult.children.count > 0) } } -- GitLab