From 43762051fb5220d8ca70b56a378b9cc50ee97c0e Mon Sep 17 00:00:00 2001 From: Jim Wallace <james.wallace@uwaterloo.ca> Date: Fri, 22 Dec 2023 07:55:20 -0500 Subject: [PATCH] Removed SWCompression --- Package.resolved | 18 --- Package.swift | 4 +- .../Legacy/20 Newsgroups.swift | 116 ++++++++++-------- 3 files changed, 68 insertions(+), 70 deletions(-) diff --git a/Package.resolved b/Package.resolved index 349cccf2..aa687ba4 100644 --- a/Package.resolved +++ b/Package.resolved @@ -1,14 +1,5 @@ { "pins" : [ - { - "identity" : "bitbytedata", - "kind" : "remoteSourceControl", - "location" : "https://github.com/tsolomko/BitByteData", - "state" : { - "revision" : "b4b41619522aacd7aae7b02fa8360833e796a03d", - "version" : "2.0.2" - } - }, { "identity" : "elva", "kind" : "remoteSourceControl", @@ -36,15 +27,6 @@ "version" : "2.3.2" } }, - { - "identity" : "swcompression", - "kind" : "remoteSourceControl", - "location" : "https://github.com/tsolomko/SWCompression.git", - "state" : { - "revision" : "cd39ca0a3b269173bab06f68b182b72fa690765c", - "version" : "4.8.5" - } - }, { "identity" : "swift-collections", "kind" : "remoteSourceControl", diff --git a/Package.swift b/Package.swift index c49981ed..3c1c268e 100644 --- a/Package.swift +++ b/Package.swift @@ -15,7 +15,7 @@ let package = Package( ], dependencies: [ .package(url: "https://github.com/Jounce/Surge.git", .upToNextMajor(from: "2.3.2")), - .package(url: "https://github.com/tsolomko/SWCompression.git", .upToNextMajor(from: "4.8.5")), + //.package(url: "https://github.com/tsolomko/SWCompression.git", .upToNextMajor(from: "4.8.5")), .package(url: "https://github.com/jbadger3/SwiftAnnoy", .upToNextMajor(from: "1.0.0")), .package(url: "https://github.com/L1MeN9Yu/Elva", .upToNextMajor(from: "2.0.0")), //.package(url: "https://github.com/swiftcsv/SwiftCSV.git", from: "0.8.0"), @@ -32,7 +32,7 @@ let package = Package( name: "SwiftNLP", dependencies: [ "Surge", - "SWCompression", + //"SWCompression", "SwiftAnnoy", .product(name: "ZSTD", package: "Elva"), //"SwiftCSV", diff --git a/Sources/SwiftNLP/1. Data Collection/Legacy/20 Newsgroups.swift b/Sources/SwiftNLP/1. Data Collection/Legacy/20 Newsgroups.swift index 6135eb3b..852899d6 100644 --- a/Sources/SwiftNLP/1. Data Collection/Legacy/20 Newsgroups.swift +++ b/Sources/SwiftNLP/1. Data Collection/Legacy/20 Newsgroups.swift @@ -1,53 +1,69 @@ +// Copyright (c) 2023 Jim Wallace // -// File.swift -// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: // -// Created by Jim Wallace on 2023-05-29. +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. // - -import Foundation -import SWCompression - -@inlinable -func downloadData(from url: URL) async throws -> Data { - let (data, _) = try await URLSession.shared.data(from: url) - return data -} - - -func download20Newsgroups() async -> [String] { - let url = URL(string: "http://qwone.com/~jason/20Newsgroups/20news-18828.tar.gz")! - - let result = Task { () -> [TarEntry]? in - - debugPrint("Downloading 20 Newsgroups dataset...") - let data = try await downloadData(from: url) - - //TODO: Decompression seems to be taking a lot of time right now? Different library? - debugPrint("Decompressing 20 Newsgroups dataset...") - let decompressedData = try? GzipArchive.unarchive(archive: data) - if let decompressedData = decompressedData { - let tarData = try? TarContainer.open(container: decompressedData) - return tarData - } - debugPrint("Unable to open TAR.") - return nil - } - - let tarData = try? await result.value - - var newsgroupData: [String] = [String]() - newsgroupData.reserveCapacity(tarData!.count) - - for entry in tarData! { - if let data = entry.data { - if let string = String(data: data, encoding: .ascii) { - newsgroupData.append(string) - } else { - debugPrint("\(entry.info.name) could not be converted to ASCII string.") - } - } - } - - return newsgroupData -} +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. +// +//import Foundation +//import SWCompression +// +//@inlinable +//func downloadData(from url: URL) async throws -> Data { +// let (data, _) = try await URLSession.shared.data(from: url) +// return data +//} +// +// +//func download20Newsgroups() async -> [String] { +// let url = URL(string: "http://qwone.com/~jason/20Newsgroups/20news-18828.tar.gz")! +// +// let result = Task { () -> [TarEntry]? in +// +// debugPrint("Downloading 20 Newsgroups dataset...") +// let data = try await downloadData(from: url) +// +// //TODO: Decompression seems to be taking a lot of time right now? Different library? +// debugPrint("Decompressing 20 Newsgroups dataset...") +// let decompressedData = try? GzipArchive.unarchive(archive: data) +// if let decompressedData = decompressedData { +// let tarData = try? TarContainer.open(container: decompressedData) +// return tarData +// } +// debugPrint("Unable to open TAR.") +// return nil +// } +// +// let tarData = try? await result.value +// +// var newsgroupData: [String] = [String]() +// newsgroupData.reserveCapacity(tarData!.count) +// +// for entry in tarData! { +// if let data = entry.data { +// if let string = String(data: data, encoding: .ascii) { +// newsgroupData.append(string) +// } else { +// debugPrint("\(entry.info.name) could not be converted to ASCII string.") +// } +// } +// } +// +// return newsgroupData +//} -- GitLab