diff --git a/Sources/SwiftNLP/1. Data Collection/Pushshift API/ReadRedditCommentFile.swift b/Sources/SwiftNLP/1. Data Collection/Pushshift API/ReadRedditCommentFile.swift deleted file mode 100644 index 2d83b6f8c3db359995a429a9832b5918fe1b8cd3..0000000000000000000000000000000000000000 --- a/Sources/SwiftNLP/1. Data Collection/Pushshift API/ReadRedditCommentFile.swift +++ /dev/null @@ -1,31 +0,0 @@ -import Foundation - -func readRedditCommentFile(fileName: String) -> RedditCommentData? - { - var json: RedditCommentData? - if let path = Bundle.main.path(forResource: fileName, ofType: "json") { - do { - let fileUrl = URL(fileURLWithPath: path) - // Getting data from JSON file using the file URL - let data = try Data(contentsOf: fileUrl, options: .mappedIfSafe) - //json = try? JSONSerialization.jsonObject(with: data) - - json = try JSONDecoder().decode(RedditCommentData?.self, from: data) - } catch { - // Handle error here - print("Unexpected error: \(error).") - } - } - return json - } - - -func readRedditCommentJson(json: Data) -> RedditCommentData? { - do { - let commentData = try JSONDecoder().decode(RedditCommentData.self, from: json) - return commentData - } catch { - print("Error while decoding reddit comment file: \(error)") - return nil - } -} diff --git a/Sources/SwiftNLP/1. Data Collection/Pushshift API/readRedditSubmissionData.swift b/Sources/SwiftNLP/1. Data Collection/Pushshift API/readRedditSubmissionData.swift deleted file mode 100644 index 4da2d138faf8a43c1011816d89275ee59f0a3e01..0000000000000000000000000000000000000000 --- a/Sources/SwiftNLP/1. Data Collection/Pushshift API/readRedditSubmissionData.swift +++ /dev/null @@ -1,30 +0,0 @@ -//func readRedditSubmissionFile(fileName: String) -> RedditSubmissionData? -// { -// var json: RedditSubmissionData? -// if let path = Bundle.main.path(forResource: fileName, ofType: "json") { -// do { -// let fileUrl = URL(fileURLWithPath: path) -// // Getting data from JSON file using the file URL -// let data = try Data(contentsOf: fileUrl, options: .mappedIfSafe) -// //json = try? JSONSerialization.jsonObject(with: data) -// -// json = try JSONDecoder().decode(RedditSubmissionData?.self, from: data) -// } catch { -// // Handle error here -// print("Unexpected error: \(error).") -// } -// } -// return json -// } - -import Foundation - -func readRedditSubmissionJson(json: Data) -> RedditSubmissionData? { - do { - let submissionData = try JSONDecoder().decode(RedditSubmissionData.self, from: json) - return submissionData - } catch { - print("Error while decoding reddit submission file: \(error)") - return nil - } -} diff --git a/Sources/SwiftNLP/1. Data Collection/Pushshift API/RedditCommentData.swift b/Sources/SwiftNLP/1. Data Collection/Pushshift Archives/RedditCommentData.swift similarity index 100% rename from Sources/SwiftNLP/1. Data Collection/Pushshift API/RedditCommentData.swift rename to Sources/SwiftNLP/1. Data Collection/Pushshift Archives/RedditCommentData.swift diff --git a/Sources/SwiftNLP/1. Data Collection/Pushshift API/RedditContainer.swift b/Sources/SwiftNLP/1. Data Collection/Pushshift Archives/RedditContainer.swift similarity index 100% rename from Sources/SwiftNLP/1. Data Collection/Pushshift API/RedditContainer.swift rename to Sources/SwiftNLP/1. Data Collection/Pushshift Archives/RedditContainer.swift diff --git a/Sources/SwiftNLP/1. Data Collection/Pushshift API/RedditSubmissionData.swift b/Sources/SwiftNLP/1. Data Collection/Pushshift Archives/RedditSubmissionData.swift similarity index 100% rename from Sources/SwiftNLP/1. Data Collection/Pushshift API/RedditSubmissionData.swift rename to Sources/SwiftNLP/1. Data Collection/Pushshift Archives/RedditSubmissionData.swift diff --git a/Sources/SwiftNLP/1. Data Collection/Reddit API/Data Types/Submission.swift b/Sources/SwiftNLP/1. Data Collection/Reddit API/Data Types/Submission.swift index 614f712e061d6d62dbbe0e20e2f8c8e589539db6..65de977415871e016bf9739dae40dd1f73aec0a8 100644 --- a/Sources/SwiftNLP/1. Data Collection/Reddit API/Data Types/Submission.swift +++ b/Sources/SwiftNLP/1. Data Collection/Reddit API/Data Types/Submission.swift @@ -45,6 +45,30 @@ public struct Submission: RedditDataItem { public let url: String? public var linkID: String { "\(RedditContentType.link)_\(id!)"} + + enum CodingKeys: String, CodingKey { + case author = "author" + case author_flair_css_class = "author_flair_css_class" + case author_flair_text = "author_flair_text" + case created_utc = "created_utc" + case domain = "domain" + case full_link = "full_link" + case id = "id" + case is_self = "is_self" + case media_embed = "media_embed" + case num_comments = "num_comments" + case over_18 = "over_18" + case permalink = "permalink" + case score = "score" + case hide_score = "hide_score" + case selftext = "selftext" + case subreddit = "subreddit" + case subreddit_id = "subreddit_id" + case thumbnail = "thumbnail" + case title = "title" + case url = "url" + } + } extension Submission { diff --git a/Tests/SwiftNLPTests/SNLPBoWDictionaryTests.swift b/Tests/SwiftNLPTests/SNLPBoWDictionaryTests.swift index ce5c266d1760700f02bd7fcb9c2648f158308dca..d676b656b1aef78f27946ad8874ecc302a4d7ce8 100644 --- a/Tests/SwiftNLPTests/SNLPBoWDictionaryTests.swift +++ b/Tests/SwiftNLPTests/SNLPBoWDictionaryTests.swift @@ -60,8 +60,8 @@ final class SwiftNLPBoWDictionaryTests: XCTestCase { let redditCommentJson = TestUtils.loadAllRedditComment() let redditSubmissionJson = TestUtils.loadAllRedditSubmission() - let redditComments = redditCommentJson.compactMap { readRedditCommentJson(json: $0) } - let redditSubmissions = redditSubmissionJson.compactMap { readRedditSubmissionJson(json: $0) } + let redditComments = redditCommentJson.compactMap { TestUtils.readRedditCommentJson(json: $0) } + let redditSubmissions = redditSubmissionJson.compactMap { TestUtils.readRedditSubmissionJson(json: $0) } // Extract body and selftext from each post, and store that for our corpus let bodies = redditComments.flatMap { $0.posts.compactMap { $0.body } } + diff --git a/Tests/SwiftNLPTests/SNLPLoadDataTests.swift b/Tests/SwiftNLPTests/SNLPLoadDataTests.swift index 4a057031980873e43fcdce3fcf4d688db6f8f644..9e39c1c5b6be02d99c36f3cde415d258fcd3f006 100644 --- a/Tests/SwiftNLPTests/SNLPLoadDataTests.swift +++ b/Tests/SwiftNLPTests/SNLPLoadDataTests.swift @@ -13,7 +13,7 @@ final class SwiftNLPLoadDataTests: XCTestCase { func testRedditSubmissions() throws { let redditSubmissionJson = TestUtils.loadAllRedditSubmission() for jsonData in redditSubmissionJson { - let redditSubmission = readRedditSubmissionJson(json: jsonData) + let redditSubmission = TestUtils.readRedditSubmissionJson(json: jsonData) XCTAssertNotNil(redditSubmission, "Failed to decode RedditSubmissionData") } } @@ -22,7 +22,7 @@ final class SwiftNLPLoadDataTests: XCTestCase { func testRedditComments() throws { let redditCommentJson = TestUtils.loadAllRedditComment() for jsonData in redditCommentJson { - let redditComment = readRedditCommentJson(json: jsonData) + let redditComment = TestUtils.readRedditCommentJson(json: jsonData) XCTAssertNotNil(redditComment, "Failed to decode RedditCommentData") } } @@ -44,8 +44,8 @@ final class SwiftNLPLoadDataTests: XCTestCase { let redditCommentJson = TestUtils.loadAllRedditComment() let redditSubmissionJson = TestUtils.loadAllRedditSubmission() - let redditComments = redditCommentJson.compactMap { readRedditCommentJson(json: $0) } - let redditSubmissions = redditSubmissionJson.compactMap { readRedditSubmissionJson(json: $0) } + let redditComments = redditCommentJson.compactMap { TestUtils.readRedditCommentJson(json: $0) } + let redditSubmissions = redditSubmissionJson.compactMap { TestUtils.readRedditSubmissionJson(json: $0) } // Extract body and selftext from each post, and store that for our corpus let bodies = redditComments.flatMap { $0.posts.compactMap { $0.body } } + diff --git a/Tests/SwiftNLPTests/SwiftAnnoyTest.swift b/Tests/SwiftNLPTests/SwiftAnnoyTest.swift index 87acf0edd49fd07077105f9618ff155b79dc260a..39b249a3d9092d3fecb5ae0aa1262c903f03df9b 100644 --- a/Tests/SwiftNLPTests/SwiftAnnoyTest.swift +++ b/Tests/SwiftNLPTests/SwiftAnnoyTest.swift @@ -34,7 +34,7 @@ final class SwiftAnnoyTest: XCTestCase { let dictionary = corpus.dictionary - let (myIndex,myMap) = dictionary.AnnoyTest() + let (myIndex,_) = dictionary.AnnoyTest() var frog = dictionary["frog"] @@ -44,7 +44,7 @@ final class SwiftAnnoyTest: XCTestCase { // Change the number of neighbors below - let (ids, distances) = myIndex.getNNsForVector(vector: &frog, neighbors: 10)! + let (_, _) = myIndex.getNNsForVector(vector: &frog, neighbors: 10)! // for (id, distance) in zip(ids, distances) { // debugPrint("\(myMap[id]!) was \(distance)") diff --git a/Tests/SwiftNLPTests/TestUtil/LoadTestJson.swift b/Tests/SwiftNLPTests/TestUtil/LoadTestJson.swift index d1a23e99e9f1e37ec72b03ab97fc0b37b70e2346..770d52de2b72400a0fde878c1ef3454781109fef 100644 --- a/Tests/SwiftNLPTests/TestUtil/LoadTestJson.swift +++ b/Tests/SwiftNLPTests/TestUtil/LoadTestJson.swift @@ -6,6 +6,7 @@ // import Foundation +import SwiftNLP // the goal of this util is to fetch and load json files into the Data class // which can then be used directly by the package to convert into objects @@ -71,4 +72,24 @@ class TestUtils { static func loadAllRedditSubmission() -> [Data] { return loadAllFiles(prefix: "RS") } + + static func readRedditCommentJson(json: Data) -> RedditCommentData? { + do { + let commentData = try JSONDecoder().decode(RedditCommentData.self, from: json) + return commentData + } catch { + print("Error while decoding reddit comment file: \(error)") + return nil + } + } + + static func readRedditSubmissionJson(json: Data) -> RedditSubmissionData? { + do { + let submissionData = try JSONDecoder().decode(RedditSubmissionData.self, from: json) + return submissionData + } catch { + print("Error while decoding reddit submission file: \(error)") + return nil + } + } }