-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrename.py
285 lines (239 loc) · 10.8 KB
/
rename.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
import sys
import os
import re
from app_logging import app_logger
from helpers import is_hidden
# -------------------------------------------------------------------
# New pattern for Volume + Issue, e.g.:
# "Comic Name v3 051 (2018) (DCP-Scan Final).cbz"
# Group(1) => "Comic Name"
# Group(2) => "v3"
# Group(3) => "051"
# Group(4) => " (2018) (DCP-Scan Final)"
# Group(5) => ".cbz"
# -------------------------------------------------------------------
VOLUME_ISSUE_PATTERN = re.compile(
r'^(.*?)\s+(v\d{1,3})\s+(\d{1,3})(.*)(\.\w+)$',
re.IGNORECASE
)
# -------------------------------------------------------------------
# Original ISSUE_PATTERN:
# Title + space + (v## or up to 3 digits) + (middle) + extension
# e.g. "Comic Name 051 (2018).cbz" or "Comic Name v3 (2022).cbr"
# -------------------------------------------------------------------
ISSUE_PATTERN = re.compile(
r'^(.*?)\s+((?:v\d{1,3})|(?:\d{1,3}))\b(.*)(\.\w+)$',
re.IGNORECASE
)
# -------------------------------------------------------------------
# New pattern for cases where the issue number comes after the year.
# e.g. "Spider-Man 2099 (1992) #44 (digital) (Colecionadores.GO).cbz"
# Group(1) => Title (e.g. "Spider-Man 2099")
# Group(2) => Year (e.g. "1992")
# Group(3) => Issue number (e.g. "#44")
# Group(4) => Extra text (ignored)
# Group(5) => Extension (e.g. ".cbz")
# -------------------------------------------------------------------
ISSUE_AFTER_YEAR_PATTERN = re.compile(
r'^(.*?)\s*\((\d{4})\)\s*(#\d{1,3})(.*)(\.\w+)$',
re.IGNORECASE
)
# -------------------------------------------------------------------
# Fallback for Title (YYYY) anything .ext
# e.g. "Comic Name (2018) some extra.cbz" -> "Comic Name (2018).cbz"
# -------------------------------------------------------------------
FALLBACK_PATTERN = re.compile(
r'^(.*?)\((\d{4})\)(.*)(\.\w+)$',
re.IGNORECASE
)
def parentheses_replacer(match):
"""
Process a parentheses group:
- If it contains a 4-digit year, return just that year in parentheses.
- Otherwise, remove the entire parentheses group.
"""
# Strip the outer parentheses
inner_text = match.group(0)[1:-1]
# Look for a 4-digit year
year_match = re.search(r'\d{4}', inner_text)
if year_match:
year = year_match.group(0)
return f"({year})"
return ''
def clean_filename_pre(filename):
"""
Pre-process the filename to:
1) Remove anything in [brackets].
2) Process parentheses:
- If a 4-digit year is present, keep only that year.
- Otherwise, remove the parentheses entirely.
3) Handle dash-separated numbers:
- Replace patterns like 'YYYY-XX' or 'YYYY-YYYY' with 'YYYY'.
- Remove any other dash-separated numbers (e.g. '01-05').
4) Remove " - Issue" from the filename.
"""
filename = filename.replace('_', ' ')
# 1) Remove bracketed text [ ... ]
filename = re.sub(r'\[.*?\]', '', filename)
# 2) Process parentheses using the helper
filename = re.sub(r'\([^)]*\)', parentheses_replacer, filename)
# 3a) Replace 4-digit–dash–2-digit (e.g. "2018-04") with the 4-digit year.
filename = re.sub(r'\b(\d{4})-\d{2}\b', r'\1', filename)
# 3b) Replace 4-digit–dash–4-digit (e.g. "1989-1990") with the first 4-digit year.
filename = re.sub(r'\b(\d{4})-\d{4}\b', r'\1', filename)
# 3c) Remove any other dash-separated numbers (e.g. "01-05")
filename = re.sub(r'\b\d+(?:-\d+)+\b', '', filename)
# 4) Remove " - Issue" from the filename
filename = re.sub(r'\s*-\s*Issue\b', '', filename, flags=re.IGNORECASE)
# Trim extra spaces that might result
filename = re.sub(r'\s+', ' ', filename).strip()
return filename
def clean_directory_name(directory_name):
"""
Pre-process the directory name using the same rules as the filename:
1) Remove anything in [brackets].
2) Remove parentheses that don't contain a 4-digit year.
3) If a parentheses contains a 4-digit year followed by -XX (month),
remove that -XX piece (e.g. "2023-04" -> "2023").
4) Remove " - Issue" from the directory name.
"""
return clean_filename_pre(directory_name)
def get_renamed_filename(filename):
"""
Given a single filename (no directory path):
1) Pre-clean the filename by removing bracketed text,
processing parentheses (keeping only 4-digit years),
and removing dash-separated numbers.
2) Try VOLUME_ISSUE_PATTERN first (e.g. "Title v3 051 (2018).ext").
3) If it fails, try the single ISSUE_PATTERN.
4) Next, try ISSUE_AFTER_YEAR_PATTERN for cases where the issue number follows the year.
5) If that fails, try FALLBACK_PATTERN for just (YYYY).
6) If none match, return None.
"""
# Pre-processing step
cleaned_filename = clean_filename_pre(filename)
# ==========================================================
# 1) VOLUME + ISSUE pattern (e.g. "Comic Name v3 051 (2018).ext")
# ==========================================================
vol_issue_match = VOLUME_ISSUE_PATTERN.match(cleaned_filename)
if vol_issue_match:
raw_title, volume_part, issue_part, middle, extension = vol_issue_match.groups()
# Clean the title: underscores -> spaces, then strip
clean_title = raw_title.replace('_', ' ').strip()
# volume_part (e.g. "v3") - keep as-is
final_volume = volume_part.strip()
# If issue_part starts with 'v', keep as-is, else zero-pad numeric
if issue_part.lower().startswith('v'):
final_issue = issue_part
else:
final_issue = f"{int(issue_part):03d}" # zero-pad if numeric
# Look for the first 4-digit year in `middle`
found_year = None
paren_groups = re.findall(r'\(([^)]*)\)', middle)
for group_text in paren_groups:
year_match = re.search(r'\b(\d{4})\b', group_text)
if year_match:
found_year = year_match.group(1)
break
if found_year:
new_filename = f"{clean_title} {final_volume} {final_issue} ({found_year}){extension}"
else:
new_filename = f"{clean_title} {final_volume} {final_issue}{extension}"
return new_filename
# ==========================================================
# 2) Single ISSUE pattern (no separate "volume" token)
# e.g. "Comic Name 051 (2018).cbz" or "Comic Name v3 (2018).cbz"
# ==========================================================
issue_match = ISSUE_PATTERN.match(cleaned_filename)
if issue_match:
raw_title, issue_part, middle, extension = issue_match.groups()
# Clean the title: underscores -> spaces, then strip
clean_title = raw_title.replace('_', ' ').strip()
# If issue_part starts with 'v', keep "vXX" as-is, else zero-pad
if issue_part.lower().startswith('v'):
final_issue = issue_part # e.g. 'v01'
else:
final_issue = f"{int(issue_part):03d}" # e.g. 1 -> 001
# Attempt to find a 4-digit year in `middle`
found_year = None
paren_groups = re.findall(r'\(([^)]*)\)', middle)
for group_text in paren_groups:
year_match = re.search(r'\b(\d{4})\b', group_text)
if year_match:
found_year = year_match.group(1)
break
if found_year:
new_filename = f"{clean_title} {final_issue} ({found_year}){extension}"
else:
new_filename = f"{clean_title} {final_issue}{extension}"
return new_filename
# ==========================================================
# 3) ISSUE number AFTER YEAR pattern
# e.g. "Spider-Man 2099 (1992) #44 (digital) (Colecionadores.GO).cbz"
# ==========================================================
issue_after_year_match = ISSUE_AFTER_YEAR_PATTERN.match(cleaned_filename)
if issue_after_year_match:
raw_title, year, issue, extra, extension = issue_after_year_match.groups()
clean_title = raw_title.replace('_', ' ').strip()
new_filename = f"{clean_title} {issue} ({year}){extension}"
return new_filename
# ==========================================================
# 4) Fallback: Title (YYYY) anything .ext
# e.g. "Comic Name (2018) some extra.cbz" -> "Comic Name (2018).cbz"
# ==========================================================
fallback_match = FALLBACK_PATTERN.match(cleaned_filename)
if fallback_match:
raw_title, found_year, _, extension = fallback_match.groups()
clean_title = raw_title.replace('_', ' ').strip()
new_filename = f"{clean_title} ({found_year}){extension}"
return new_filename
# ==========================================================
# 5) No match => return None
# ==========================================================
return None
def rename_files(directory):
"""
Walk through the given directory (including subdirectories) and rename
all files that match the patterns above, skipping hidden files.
"""
app_logger.info("********************// Rename Directory Files //********************")
for subdir, dirs, files in os.walk(directory):
# Skip hidden directories.
dirs[:] = [d for d in dirs if not is_hidden(os.path.join(subdir, d))]
for filename in files:
old_path = os.path.join(subdir, filename)
# Skip hidden files.
if is_hidden(old_path):
app_logger.info(f"Skipping hidden file: {old_path}")
continue
new_name = get_renamed_filename(filename)
if new_name and new_name != filename:
new_path = os.path.join(subdir, new_name)
app_logger.info(f"Renaming:\n {old_path}\n --> {new_path}\n")
os.rename(old_path, new_path)
def rename_file(file_path):
"""
Renames a single file if it matches either pattern using the logic
in get_renamed_filename(), skipping hidden files.
"""
app_logger.info("********************// Rename Single File //********************")
# Skip hidden files using the is_hidden helper.
if is_hidden(file_path):
app_logger.info(f"Skipping hidden file: {file_path}")
return None
directory, filename = os.path.split(file_path)
new_name = get_renamed_filename(filename)
if new_name and new_name != filename:
new_path = os.path.join(directory, new_name)
app_logger.info(f"Renaming:\n {file_path}\n --> {new_path}\n")
os.rename(file_path, new_path)
return new_path
else:
app_logger.info("No renaming pattern matched or no change needed.")
return None
if __name__ == "__main__":
if len(sys.argv) < 2:
app_logger.info("No directory provided!")
else:
directory = sys.argv[1]
rename_files(directory)