diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..a15b92d --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,17 @@ +# Changelog + +## [v2.1] - 2023-03-12 + +### Features +* Duplication order now uses path length when file names have the same length +* Exposing format-pattern and unknown-format-pattern as CLI parameters + +### Bug Fixes +* Fixed report when deleting duplicate files +* Ordering by file length too on the duplicate report given by the inspect command to make behaviour consistent + + +## [v2.0.1] - 2023-03-04 +### Features + +* breaking change - Complete rework of the application diff --git a/README.md b/README.md index 22a128d..18b7138 100644 --- a/README.md +++ b/README.md @@ -12,6 +12,11 @@ This tool helps you to unify and organise your media files using your own rules. It also deals with duplicates, so you don't have to. +## Disclaimer + +Per design this command line interface tool deletes only duplicate files to potentially avoid any risk of losing data. + + ## Features * Move, Copy or delete duplicates operations @@ -34,26 +39,28 @@ It also deals with duplicates, so you don't have to. ```bash $ cataloguer --help - - Usage: cataloguer [OPTIONS] COMMAND [ARGS]... - - Command line interface. - All [OPTIONS] can be passed as environment variables with the "CATALOGUER_" prefix. - file arguments accept file names and a special value "-" to indicate stdin or stdout - -╭─ Options ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮ -│ --verbose -v Enables verbose mode. Disabled by default │ -│ --interactive/--no-interactive Disables confirmation prompts. Enabled by default │ -│ --help Show this message and exit. │ -╰───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Commands ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮ -│ copy Copy files. In case of duplicates will take the shortest name. │ -│ create-catalogue Creates a new catalogue. │ -│ delete-catalogue Deletes a catalogue. No files are affected. │ -│ delete-duplicates Delete duplicates. │ -│ inspect Inspects a path or a catalogue │ -│ move Move files. In case of duplicates will take the shortest name. │ -╰───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ + + Usage: cataloguer [OPTIONS] COMMAND [ARGS]... + + Command line interface. + All [OPTIONS] can be passed as environment variables with the "CATALOGUER_" prefix. + file arguments accept file names and a special value "-" to indicate stdin or stdout + +╭─ Options ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮ +│ --verbose -v Enables verbose mode. Disabled by default │ +│ --format-pattern TEXT Pattern template. e.g. %Y/%m/{file} │ +│ --unknown-format-pattern TEXT Pattern template fallback when date cannot get extracted │ +│ --interactive/--no-interactive Disables confirmation prompts. Enabled by default │ +│ --help Show this message and exit. │ +╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Commands ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮ +│ copy Copy files. In case of duplicates will take the shortest name. │ +│ create-catalogue Creates a new catalogue. │ +│ delete-catalogue Deletes a catalogue. No files are affected. │ +│ delete-duplicates Delete duplicates. │ +│ inspect Inspects a path or a catalogue │ +│ move Move files. In case of duplicates will take the shortest name. │ +╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ ``` @@ -66,11 +73,10 @@ We are going to start creating a new directory `media`: We are going to create a new catalogue named `local_photos` which is going to get store on the `media` directory. We specify our format pattern so photos are group by `year` and a subgroup of `month`: - export CATALOGUER_FORMAT_PATTERN=%Y/%m/{file} - cataloguer create-catalogue local_media --path ./media + cataloguer create-catalogue local_media ./media --format-pattern %Y/%m/{file} -Now, we add some photos from an old storage driver: +Now, we add some photos from an old storage drive: cataloguer copy /mnt/hdd/old_photos local_media @@ -92,7 +98,7 @@ To get a summary of our catalogue we run: ## Options -`CATALOGUER_FORMAT_PATTERN` accepts the following patterns +`format-pattern` accepts the following patterns * Common date codes: * `%d`: Day of the month as number * `%m`: Month as number @@ -103,18 +109,21 @@ To get a summary of our catalogue we run: * `/` Specifies a new folder * `{media_type}`: File type (`image`, `video`) * `{media_format}`: File format (`jpeg`, `tiff`, `png`, `gif`, `mp4` ...) -* `{file}` Original filename -* `{file_extension}` Original filename extension -* `{file_name}` Original filename without the extension +* `{file}` Original filename (`photo.jpg`) +* `{file_extension}` filename extension (`photo`) +* `{file_name}` filename without the extension (`jpg`) * `{relative_path}` Relative path to the source directory ### Advance usage: - -`CATALOGUER_UNKNOWN_PATH_FORMAT` Accepts the same variables as `CATALOGUER_FORMAT_PATTERN` but date patterns +`unknown-format-pattern` Accepts the same variables as `format-pattern` but date patterns are resolved using the current date since it was not possible to recover the creation date of the file. This can be useful to not leave files behind. +Variables can also be specified as environment variables, using a `CATALOGUER_` prefix. e.g: + + export CATALOGUER_FORMAT_PATTERN=%Y/%m/{file} + `CATALOGUER_STORAGE_LOCATION` Accepts any path. That location will store cataloguer metadata. By default, it will create a `.catalogues` in the user's home directory. @@ -122,7 +131,7 @@ By default, it will create a `.catalogues` in the user's home directory. Pattern to fix file extensions keeping the folder structure: - CATALOGUER_FORMAT_PATTERN={relative_path}/{basename}.{media_format} cataloguer ./input ./output + cataloguer --format-pattern {relative_path}/{basename}.{media_format} move ./input ./output ## TODO list diff --git a/cataloguer/cli.py b/cataloguer/cli.py index 093776a..d8302e6 100644 --- a/cataloguer/cli.py +++ b/cataloguer/cli.py @@ -47,13 +47,15 @@ def __str__(self): help="Enables verbose mode. Disabled by default", default=False, ) +@click.option("--format-pattern", help='Pattern template. e.g. %Y/%m/{file}', required=False) +@click.option("--unknown-format-pattern", help='Pattern template fallback when date cannot get extracted', required=False) @click.option( "--interactive/--no-interactive", help="Disables confirmation prompts. Enabled by default", default=True, ) @click.pass_context -def cli(ctx, verbose, interactive): +def cli(ctx, verbose, interactive, format_pattern, unknown_format_pattern): """ Command line interface. @@ -62,7 +64,7 @@ def cli(ctx, verbose, interactive): file arguments accept file names and a special value "-" to indicate stdin or stdout """ if not ctx.obj: - global_settings = GlobalSettings() + global_settings = GlobalSettings(format_pattern=format_pattern, unknown_format_pattern=unknown_format_pattern) ctx.obj = Context( global_settings=global_settings, storage=Storage(path=global_settings.storage_location), @@ -110,8 +112,12 @@ def inspect(ctx: Context, src, media_only): print_table_summary(name=name, files=files, duplicated_files=duplicated_files) if duplicated_files: + duplicated_list_of_files_sorted_by_name_length = list( + sorted(duplicated_list, key=lambda file: (len(file.path.name), len(str(file.path)))) + for duplicated_list in duplicated_files + ) print_duplicate_files( - duplicated_files=duplicated_files, from_path=directory.path + duplicated_files=duplicated_list_of_files_sorted_by_name_length, from_path=directory.path ) if isinstance(directory, Catalogue): @@ -143,8 +149,10 @@ def delete_catalogue(ctx: Context, name): @cli.command() @click.argument("name") @click.argument("src", required=False) +@click.option("--format-pattern", help='Pattern template. e.g. %Y/%m/{file}', required=False) +@click.option("--unknown-format-pattern", help='Pattern template fallback when date cannot get extracted', required=False) @click.pass_obj -def create_catalogue(ctx: Context, name, src): +def create_catalogue(ctx: Context, name, src, format_pattern, unknown_format_pattern): """ Creates a new catalogue. """ @@ -155,6 +163,13 @@ def create_catalogue(ctx: Context, name, src): if not src_path or not src_path.is_dir(): raise click.BadParameter(f'Error "{src}" is not an existing path') + format_pattern = format_pattern or ctx.global_settings.format_pattern + unknown_format_pattern = unknown_format_pattern or ctx.global_settings.unknown_format_pattern + if not format_pattern: + raise click.BadParameter( + 'Error there is no format pattern specified' + ) + catalogue_path = src_path or ctx.workdir existing_catalogue = ctx.storage.load_catalogue(name) @@ -165,8 +180,8 @@ def create_catalogue(ctx: Context, name, src): new_catalogue = Catalogue( name=name, - format_pattern=ctx.global_settings.format_pattern, - unknown_format_pattern=ctx.global_settings.unknown_format_pattern, + format_pattern=format_pattern, + unknown_format_pattern=unknown_format_pattern, path=catalogue_path, ) new_catalogue.explore() @@ -261,7 +276,7 @@ def extract_files(src_data): if duplicated_list_of_files: duplicated_list_of_files_sorted_by_name_length = list( - sorted(duplicated_list, key=lambda file: len(file.path.name)) + sorted(duplicated_list, key=lambda file: (len(file.path.name), len(str(file.path)))) for duplicated_list in duplicated_list_of_files ) @@ -325,16 +340,29 @@ def operate(ctx, src, dst, operation_mode, dry_run=False): f'Error "{dst}" is neither a catalogue or an existing directory' ) - if isinstance(src_data, File) and ( - not dst_data or not isinstance(dst_data, (Catalogue, Directory)) - ): + if isinstance(src_data, (Catalogue, Directory)) and dst_data: + if src_data.path.is_relative_to(dst_data.path): + raise click.BadParameter( + f'Error "{dst}" cannot be a subdirectory of {src}' + ) + + if operation_mode in (Operation.MOVE, Operation.COPY): + format_pattern = ctx.global_settings.format_pattern + if dst_data and isinstance(dst_data, Catalogue): + format_pattern = format_pattern or dst_data.format_pattern + if not format_pattern: + raise click.BadParameter( + 'Error there is no format pattern specified' + ) + + if isinstance(src_data, File) and not dst_data: raise click.BadParameter( f'Error "{src}" is a file but no valid destination was provided' ) with console.status( f"[green]Inspecting files...", - ) as status: + ): ( duplicated_list_of_files_sorted_by_name_length, duplicated_discarded_files, @@ -447,8 +475,8 @@ def process_files( path_format = ctx.global_settings.format_pattern unknown_format_pattern = ctx.global_settings.unknown_format_pattern if isinstance(dst_data, Catalogue): - path_format = dst_data.format_pattern - unknown_format_pattern = dst_data.unknown_format_pattern + path_format = path_format or dst_data.format_pattern + unknown_format_pattern = unknown_format_pattern or dst_data.unknown_format_pattern tree = DirectoryTree() skipped_tree = DirectoryTree() diff --git a/cataloguer/settings.py b/cataloguer/settings.py index 003ee54..9646c1c 100644 --- a/cataloguer/settings.py +++ b/cataloguer/settings.py @@ -16,7 +16,7 @@ class GlobalSettings(BaseSettings): - format_pattern: str = "%Y/%m/%d/{file}" + format_pattern: Optional[str] = None unknown_format_pattern: Optional[str] = None storage_location: Path = Path.home().joinpath(".catalogues/") diff --git a/pyproject.toml b/pyproject.toml index 2892e14..23b34dc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "cataloguer" -version = "2.0.1" +version = "2.1" description = "Organize your media files" authors = ["Iago Veloso"] license = "MIT" diff --git a/tests/test_cli.py b/tests/test_cli.py index fe4b955..5a8294a 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -49,10 +49,8 @@ def test_inspect(cli_runner, test_catalogue_path): def test_create_catalogue_and_adding_files( monkeypatch, cli_runner, test_catalogue_path ): - monkeypatch.setenv("CATALOGUER_FORMAT_PATTERN", "%Y/%m/%d/{file}") - result = invoke( - args=("create-catalogue", "test_catalogue", str(test_catalogue_path)), + args=("create-catalogue", "--format-pattern", "%Y/%m/%d/{file}", "test_catalogue", str(test_catalogue_path)), runner=cli_runner, ) assert result.exit_code == 0, result.output