From 43762051fb5220d8ca70b56a378b9cc50ee97c0e Mon Sep 17 00:00:00 2001
From: Jim Wallace <james.wallace@uwaterloo.ca>
Date: Fri, 22 Dec 2023 07:55:20 -0500
Subject: [PATCH] Removed SWCompression

---
 Package.resolved                              |  18 ---
 Package.swift                                 |   4 +-
 .../Legacy/20 Newsgroups.swift                | 116 ++++++++++--------
 3 files changed, 68 insertions(+), 70 deletions(-)

diff --git a/Package.resolved b/Package.resolved
index 349cccf2..aa687ba4 100644
--- a/Package.resolved
+++ b/Package.resolved
@@ -1,14 +1,5 @@
 {
   "pins" : [
-    {
-      "identity" : "bitbytedata",
-      "kind" : "remoteSourceControl",
-      "location" : "https://github.com/tsolomko/BitByteData",
-      "state" : {
-        "revision" : "b4b41619522aacd7aae7b02fa8360833e796a03d",
-        "version" : "2.0.2"
-      }
-    },
     {
       "identity" : "elva",
       "kind" : "remoteSourceControl",
@@ -36,15 +27,6 @@
         "version" : "2.3.2"
       }
     },
-    {
-      "identity" : "swcompression",
-      "kind" : "remoteSourceControl",
-      "location" : "https://github.com/tsolomko/SWCompression.git",
-      "state" : {
-        "revision" : "cd39ca0a3b269173bab06f68b182b72fa690765c",
-        "version" : "4.8.5"
-      }
-    },
     {
       "identity" : "swift-collections",
       "kind" : "remoteSourceControl",
diff --git a/Package.swift b/Package.swift
index c49981ed..3c1c268e 100644
--- a/Package.swift
+++ b/Package.swift
@@ -15,7 +15,7 @@ let package = Package(
     ],
     dependencies: [
         .package(url: "https://github.com/Jounce/Surge.git", .upToNextMajor(from: "2.3.2")),
-        .package(url: "https://github.com/tsolomko/SWCompression.git", .upToNextMajor(from: "4.8.5")),
+        //.package(url: "https://github.com/tsolomko/SWCompression.git", .upToNextMajor(from: "4.8.5")),
         .package(url: "https://github.com/jbadger3/SwiftAnnoy", .upToNextMajor(from: "1.0.0")),
         .package(url: "https://github.com/L1MeN9Yu/Elva", .upToNextMajor(from: "2.0.0")),
         //.package(url: "https://github.com/swiftcsv/SwiftCSV.git", from: "0.8.0"),
@@ -32,7 +32,7 @@ let package = Package(
             name: "SwiftNLP",
             dependencies: [
                 "Surge",
-                "SWCompression",
+                //"SWCompression",
                 "SwiftAnnoy",
                 .product(name: "ZSTD", package: "Elva"),
                 //"SwiftCSV",
diff --git a/Sources/SwiftNLP/1. Data Collection/Legacy/20 Newsgroups.swift b/Sources/SwiftNLP/1. Data Collection/Legacy/20 Newsgroups.swift
index 6135eb3b..852899d6 100644
--- a/Sources/SwiftNLP/1. Data Collection/Legacy/20 Newsgroups.swift	
+++ b/Sources/SwiftNLP/1. Data Collection/Legacy/20 Newsgroups.swift	
@@ -1,53 +1,69 @@
+// Copyright (c) 2023 Jim Wallace
 //
-//  File.swift
-//  
+// Permission is hereby granted, free of charge, to any person
+// obtaining a copy of this software and associated documentation
+// files (the "Software"), to deal in the Software without
+// restriction, including without limitation the rights to use,
+// copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following
+// conditions:
 //
-//  Created by Jim Wallace on 2023-05-29.
+// The above copyright notice and this permission notice shall be
+// included in all copies or substantial portions of the Software.
 //
-
-import Foundation
-import SWCompression
-
-@inlinable
-func downloadData(from url: URL) async throws -> Data {
-    let (data, _) = try await URLSession.shared.data(from: url)
-    return data
-}
-
-
-func download20Newsgroups() async -> [String] {
-    let url = URL(string: "http://qwone.com/~jason/20Newsgroups/20news-18828.tar.gz")!
-    
-    let result = Task { () -> [TarEntry]? in
-        
-        debugPrint("Downloading 20 Newsgroups dataset...")
-        let data = try await downloadData(from: url)
-        
-        //TODO: Decompression seems to be taking a lot of time right now? Different library?
-        debugPrint("Decompressing 20 Newsgroups dataset...")
-        let decompressedData = try? GzipArchive.unarchive(archive: data)
-        if let decompressedData = decompressedData {
-            let tarData = try? TarContainer.open(container: decompressedData)
-            return tarData
-        }
-        debugPrint("Unable to open TAR.")
-        return nil
-    }
-    
-    let tarData = try? await result.value
-    
-    var newsgroupData: [String] = [String]()
-    newsgroupData.reserveCapacity(tarData!.count)
-    
-    for entry in tarData! {
-        if let data = entry.data {
-            if let string = String(data: data, encoding: .ascii) {
-                newsgroupData.append(string)
-            } else {
-                debugPrint("\(entry.info.name) could not be converted to ASCII string.")
-            }
-        }
-    }
-    
-    return newsgroupData
-}
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+// OTHER DEALINGS IN THE SOFTWARE.
+//
+//import Foundation
+//import SWCompression
+//
+//@inlinable
+//func downloadData(from url: URL) async throws -> Data {
+//    let (data, _) = try await URLSession.shared.data(from: url)
+//    return data
+//}
+//
+//
+//func download20Newsgroups() async -> [String] {
+//    let url = URL(string: "http://qwone.com/~jason/20Newsgroups/20news-18828.tar.gz")!
+//    
+//    let result = Task { () -> [TarEntry]? in
+//        
+//        debugPrint("Downloading 20 Newsgroups dataset...")
+//        let data = try await downloadData(from: url)
+//        
+//        //TODO: Decompression seems to be taking a lot of time right now? Different library?
+//        debugPrint("Decompressing 20 Newsgroups dataset...")
+//        let decompressedData = try? GzipArchive.unarchive(archive: data)
+//        if let decompressedData = decompressedData {
+//            let tarData = try? TarContainer.open(container: decompressedData)
+//            return tarData
+//        }
+//        debugPrint("Unable to open TAR.")
+//        return nil
+//    }
+//    
+//    let tarData = try? await result.value
+//    
+//    var newsgroupData: [String] = [String]()
+//    newsgroupData.reserveCapacity(tarData!.count)
+//    
+//    for entry in tarData! {
+//        if let data = entry.data {
+//            if let string = String(data: data, encoding: .ascii) {
+//                newsgroupData.append(string)
+//            } else {
+//                debugPrint("\(entry.info.name) could not be converted to ASCII string.")
+//            }
+//        }
+//    }
+//    
+//    return newsgroupData
+//}
-- 
GitLab