Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bundle Command #14

Open
wants to merge 11 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
677 changes: 549 additions & 128 deletions Cargo.lock

Large diffs are not rendered by default.

9 changes: 8 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ serde = { version = "1.0.214", features = ["derive"] }
serde_json = "1.0.132"
toml = "0.8.19"
tokio = { version = "1.41.0", features = ["full"] }
reqwest = { version = "0.12.9", default-features = false, features = ["http2"] }
reqwest = { version = "0.12.9", default-features = false, features = ["http2", "blocking"] }
serde_yaml = "0.9.34"
regex = "1.11.1"
spdx = "0.10.6"
Expand All @@ -35,6 +35,13 @@ anyhow = "1.0.92"
clap-verbosity-flag = "2.2.2"
env_logger = "0.11.5"
log = "0.4.22"
tar = "0.4.42"
zip = "2.2.0"
zstd = "0.13.2"
bzip2 = "0.4.4"
indicatif = "0.17.8"
rayon = "1.10.0"
scopeguard = "1.2.0"

[dev-dependencies]
assert_cmd = "2.0.14"
Expand Down
3 changes: 2 additions & 1 deletion deny.toml
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ allow = [
#"Apache-2.0 WITH LLVM-exception",
"BSD-2-Clause",
"BSD-3-Clause",
"BSL-1.0",
"MPL-2.0",
"ISC",
"Unicode-DFS-2016"
Expand Down Expand Up @@ -147,7 +148,7 @@ registries = [
# https://embarkstudios.github.io/cargo-deny/checks/bans/cfg.html
[bans]
# Lint level for when multiple versions of the same crate are detected
multiple-versions = "warn"
multiple-versions = "allow"
# Lint level for when a crate version requirement is `*`
wildcards = "allow"
# The graph highlighting used when creating dotgraphs for crates
Expand Down
288 changes: 288 additions & 0 deletions src/bundle.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,288 @@
use scopeguard::defer;
use std::fs::{self, create_dir_all, File};
use std::io::{self, copy, BufReader};
use std::path::{Path, PathBuf};

use bzip2::read::BzDecoder;

use anyhow::{Context, Result};
use log::{debug, warn};
use reqwest::blocking::get;
use tar::Archive;
use zip::ZipArchive;
use zstd::Decoder;

type LicenseFileName = String;
type LicenseText = String;

pub fn get_license_contents_for_package_url(url: &str) -> Result<Vec<(String, String)>> {
let file_name = Path::new(url)
.file_name()
.and_then(|name| name.to_str())
.ok_or_else(|| anyhow::anyhow!("Failed to extract file name from URL"))?;

let output_dir = Path::new(file_name)
.file_stem()
.and_then(|stem| stem.to_str())
.ok_or_else(|| anyhow::anyhow!("Failed to get file stem as str"))?;

download_file(url, file_name)?;
defer! {
let _ = fs::remove_file(file_name);
}
unpack_conda_file(file_name)?;
defer! {
let _ = fs::remove_dir_all(output_dir);
}
let license_strings = get_licenses_from_unpacked_conda_package(output_dir)?;

std::fs::remove_file(file_name)
.with_context(|| format!("Failed to delete file {}", file_name))?;
std::fs::remove_dir_all(output_dir)
.with_context(|| format!("Failed to remove directory {}", output_dir))?;

Ok(license_strings)
}

fn find_all_licenses_directories(root: &Path) -> Result<Vec<PathBuf>> {
let mut licenses_dirs = Vec::new();
visit_dir(root, &mut licenses_dirs)?;
Ok(licenses_dirs)
}

fn visit_dir(path: &Path, licenses_dirs: &mut Vec<PathBuf>) -> Result<()> {
for entry in fs::read_dir(path)? {
let entry = entry?;
let entry_path = entry.path();

if entry_path.is_dir() {
if entry_path.file_name().unwrap() == "licenses" {
licenses_dirs.push(entry_path.clone());
} else {
visit_dir(&entry_path, licenses_dirs)?;
}
}
}
Ok(())
}

fn get_licenses_from_unpacked_conda_package(
unpacked_conda_package_dir: &str,
) -> Result<Vec<(LicenseFileName, LicenseText)>> {
let mut license_strings = Vec::new();

let licenses_dirs = find_all_licenses_directories(Path::new(unpacked_conda_package_dir))?;

if !licenses_dirs.is_empty() {
for licenses_dir in licenses_dirs {
get_license_texts_for_dir(&licenses_dir, &mut license_strings).with_context(|| {
format!(
"Failed to get license content from {}. Does the licenses directory exist within the package?",
licenses_dir.display()
)
})?;
}
if license_strings.is_empty() {
warn!(
"Warning: No license files found in {}. Adding default license message.",
unpacked_conda_package_dir
);
license_strings.push((
"NO LICENSE FOUND".to_string(),
"THE LICENSE OF THIS PACKAGE IS NOT PACKAGED!".to_string(),
));
}
} else {
warn!(
"Warning: No 'info/licenses' directory found in {}. Adding default license message.",
unpacked_conda_package_dir
);
license_strings.push((
"NO LICENSE FOUND".to_string(),
"THE LICENSE OF THIS PACKAGE IS NOT PACKAGED!".to_string(),
));
}

license_strings.sort();
license_strings.dedup();

Ok(license_strings)
}

fn get_license_texts_for_dir(
path: &Path,
license_strings: &mut Vec<(LicenseFileName, LicenseText)>,
) -> Result<()> {
for entry in fs::read_dir(path)? {
let entry = entry?;
let entry_path = entry.path();

if entry_path.is_dir() {
get_license_texts_for_dir(&entry_path, license_strings)?;
} else {
let entry_file_name = entry.file_name().to_string_lossy().to_string();
let content = fs::read_to_string(&entry_path)
.with_context(|| format!("Failed to read {:?}", entry_path))?;
license_strings.push((entry_file_name, content));
}
}
Ok(())
}

fn download_file(url: &str, file_path: &str) -> Result<()> {
let response = get(url).with_context(|| format!("Failed to download {}", file_path))?;

if !response.status().is_success() {
return Err(anyhow::anyhow!(
"Failed to download file: {}",
response.status()
));
}

let mut dest = File::create(file_path)
.with_context(|| format!("File at {} could not be created", file_path))?;
let content = response.bytes()?;

copy(&mut content.as_ref(), &mut dest)?;

debug!("File downloaded successfully to {}", file_path);
Ok(())
}

fn unpack_conda_file(file_path: &str) -> Result<()> {
let output_dir = Path::new(file_path)
.file_stem()
.map(PathBuf::from)
.expect("Failed to get file stem");

let file_extension = Path::new(file_path)
.extension()
.and_then(|ext| ext.to_str())
.unwrap_or("");

match file_extension {
"conda" => unpack_conda_archive(file_path, &output_dir),
"bz2" => unpack_tar_bz2_archive(file_path, &output_dir),
other => Err(anyhow::anyhow!(format!(
"Unsupported file extension: {}",
other
))),
}
}

fn unpack_conda_archive(file_path: &str, output_dir: &Path) -> Result<()> {
let zip_file =
File::open(file_path).with_context(|| format!("Failed to open {}", file_path))?;
let mut zip = ZipArchive::new(BufReader::new(zip_file))
.with_context(|| "Failed to create zip archive")?;

for i in 0..zip.len() {
let mut zip_file = zip.by_index(i)?;
if zip_file.name().ends_with(".tar.zst") {
let mut tar_zst_data = Vec::new();
io::copy(&mut zip_file, &mut tar_zst_data)?;

let mut decoder = Decoder::new(&tar_zst_data[..])?;
let mut tar_data = Vec::new();
io::copy(&mut decoder, &mut tar_data)?;

let mut tar = Archive::new(&tar_data[..]);
create_dir_all(output_dir).with_context(|| {
format!(
"Failed to create directory {}",
output_dir.to_string_lossy()
)
})?;
tar.unpack(output_dir)
.with_context(|| format!("Failed to unpack {}", output_dir.to_string_lossy()))?;
debug!("Successfully unpacked to {:?}", output_dir);
}
}
Ok(())
}

fn unpack_tar_bz2_archive(file_path: &str, output_dir: &Path) -> Result<()> {
let tar_bz2_file =
File::open(file_path).with_context(|| format!("Failed to open {}", file_path))?;
let bz2_decoder = BzDecoder::new(tar_bz2_file);
let mut tar = Archive::new(bz2_decoder);

create_dir_all(output_dir).with_context(|| {
format!(
"Failed to create directory {}",
output_dir.to_string_lossy()
)
})?;
tar.unpack(output_dir)
.with_context(|| format!("Failed to unpack {}", output_dir.to_string_lossy()))?;
debug!("Successfully unpacked .tar.bz2 to {:?}", output_dir);

Ok(())
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_find_all_licenses_directories() {
let root = Path::new("tests/test_bundle_data/polarify-0.2.0-pyhd8ed1ab_0.conda");
let result = find_all_licenses_directories(root).unwrap();

assert_eq!(result.len(), 2);
assert!(result.contains(&PathBuf::from("tests/test_bundle_data/polarify-0.2.0-pyhd8ed1ab_0.conda/pkg-polarify-0.2.0-pyhd8ed1ab_0/site-packages/polarify-0.2.0.dist-info/licenses")));
assert!(result.contains(&PathBuf::from("tests/test_bundle_data/polarify-0.2.0-pyhd8ed1ab_0.conda/pkg-polarify-0.2.0-pyhd8ed1ab_0/info/licenses")));

let root = Path::new("tests/test_bundle_data/_libgcc_mutex-0.1-free");
let result = find_all_licenses_directories(root).unwrap();

assert_eq!(result.len(), 0);
}

#[test]
fn test_get_licenses_from_unpacked_conda_package_with_license_files() {
let unpacked_conda_dir =
Path::new("tests/test_bundle_data/polarify-0.2.0-pyhd8ed1ab_0.conda");

let result =
get_licenses_from_unpacked_conda_package(unpacked_conda_dir.to_str().unwrap()).unwrap();

assert_eq!(result.len(), 1);
assert!(result.contains(&(
String::from("LICENSE"),
String::from("This is the license.")
)));
}

#[test]
fn test_get_licenses_from_unpacked_conda_package_without_licenses_directory() {
let unpacked_conda_dir = Path::new("tests/test_bundle_data/_libgcc_mutex-0.1-free");

let result =
get_licenses_from_unpacked_conda_package(unpacked_conda_dir.to_str().unwrap()).unwrap();

assert_eq!(result.len(), 1);
assert!(result.contains(&(
"NO LICENSE FOUND".to_string(),
"THE LICENSE OF THIS PACKAGE IS NOT PACKAGED!".to_string()
)));
}

#[test]
fn test_get_licenses_from_unpacked_conda_package_empty_licenses_dir() {
let unpacked_conda_dir = Path::new("tests/test_bundle_data/empty_licenses_dir");

fs::create_dir_all(unpacked_conda_dir.join("licenses")).unwrap();

let result =
get_licenses_from_unpacked_conda_package(unpacked_conda_dir.to_str().unwrap()).unwrap();

fs::remove_dir_all(unpacked_conda_dir).unwrap();

assert_eq!(result.len(), 1);
assert!(result.contains(&(
"NO LICENSE FOUND".to_string(),
"THE LICENSE OF THIS PACKAGE IS NOT PACKAGED!".to_string()
)));
}
}
27 changes: 27 additions & 0 deletions src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,12 @@ use clap_verbosity_flag::{ErrorLevel, Verbosity};

use clap::Parser;

use crate::conda_deny_config::CondaDenyConfig;

type Platforms = Vec<String>;
type Lockfiles = Vec<String>;
type Environments = Vec<String>;

#[derive(Parser, Debug)]
#[command(name = "conda-deny", about = "Check and list licenses of pixi and conda environments", version = env!("CARGO_PKG_VERSION"))]
pub struct Cli {
Expand Down Expand Up @@ -38,6 +44,27 @@ pub enum Commands {
osi: bool,
},
List {},
Bundle {
#[arg(short, long)]
output: Option<String>,
},
}

pub fn combine_cli_and_config_input(
config: &CondaDenyConfig,
cli_lockfiles: &[String],
cli_platforms: &[String],
cli_environments: &[String],
) -> (Lockfiles, Platforms, Environments) {
let mut platforms = config.get_platform_spec().map_or(vec![], |p| p);
let mut lockfiles = config.get_lockfile_spec();
let mut environment_specs = config.get_environment_spec().map_or(vec![], |e| e);

platforms.extend(cli_platforms.to_owned());
lockfiles.extend(cli_lockfiles.to_owned());
environment_specs.extend(cli_environments.to_owned());

(lockfiles, platforms, environment_specs)
}

#[cfg(test)]
Expand Down
6 changes: 3 additions & 3 deletions src/conda_deny_config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,14 @@ use serde::Deserialize;
use std::vec;
use std::{fs::File, io::Read};

#[derive(Debug, Deserialize)]
#[derive(Debug, Deserialize, Clone)]
pub struct CondaDenyConfig {
tool: Tool,
#[serde(skip)]
pub path: String,
}

#[derive(Debug, Deserialize)]
#[derive(Debug, Deserialize, Clone)]
pub struct Tool {
#[serde(rename = "conda-deny")]
conda_deny: CondaDeny,
Expand Down Expand Up @@ -51,7 +51,7 @@ struct PixiEnvironmentEntry {
_environments: Vec<String>,
}

#[derive(Debug, Deserialize)]
#[derive(Debug, Deserialize, Clone)]
pub struct CondaDeny {
#[serde(rename = "license-whitelist")]
license_whitelist: Option<LicenseWhitelist>,
Expand Down
Loading