-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathgetTitles.py
117 lines (111 loc) · 3.01 KB
/
getTitles.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import config
from os import listdir
'''
SUMMARY
-------
Read in a list of file names from a plain text file with one file name per line
-------
INPUT: optional path to titles file, defaults to config.MOVIE_TITLES_FILE
OUTPUT: list of strings stripped of white space
'''
def get_titles_from_file(titles_file=""):
if not titles_file:
titles_file = config.MOVIE_TITLES_FILE
try:
f = open(titles_file,'r')
except IOError:
print("No such file as %s" % titles_file)
return None
raw_titles = []
for line in f:
raw_titles.append(line.strip())
f.close()
return raw_titles
'''
SUMMARY
-------
Read in a list of file names from a directory where each file or subdirectory
is one film
-------
INPUT: optional path to directory, defaults to config.MOVIE_TITLES_FILE
OUTPUT: list of strings stripped of white space
'''
def get_titles_from_dir(dir_path=""):
if not dir_path:
dir_path = config.MOVIE_TITLES_FILE
try:
dirs = listdir(dir_path)
except OSError:
print("No such directory as %s" % dir_path)
return None
return [d.strip() for d in dirs]
'''
SUMMARY
-------
Take in a list of raw titles and return 3-tuples with the title, year, and
file extension
-------
INPUT: list of raw title strings
OUTPUT: list of 3-tuples (title,year,file extension)
'''
def parse_titles(title_list):
parsed_titles = []
for raw_title in title_list:
raw_title, ext = parse_extension(raw_title)
raw_title, year = parse_year(raw_title)
raw_title = raw_title.strip()
parsed_titles.append((raw_title,year,ext))
if not year:
print("WARNING:File %s does not appear to have a year" % raw_title)
return parsed_titles
'''
SUMMARY
-------
Parse the file extension, if it exists, from the title and return title
without the extension
-------
INPUT: raw title
OUTPUT: 2-tuple (title without extension, parsed extension)
ie ("This movie (1999)","mp4")
'''
def parse_extension(title):
last_period_ind = title.rfind('.')
#if there's a period, check if it's for a file extension
if last_period_ind == -1:
return (title,"")
poss_extension = title[last_period_ind+1:]
#if file extension, save the extension, and remove it from title
if poss_extension in config.MOVIE_FILE_EXTENSIONS:
title = title[:last_period_ind]
return (title,poss_extension)
else:
return (title, "")
'''
SUMMARY
-------
Parse the file year, if it exists, from the title and return title
without the year
-------
INPUT: raw title
OUTPUT: 2-tuple (title without year, parsed year as an int)
ie ("This movie",1999)
'''
def parse_year(title):
left_par_ind = title.rfind('(')
right_par_ind = title.rfind(')')
if (left_par_ind == -1) or (right_par_ind == -1):
return (title, 0)
year = title[left_par_ind+1:right_par_ind].strip()
if len(year) != 4:
return (title, 0)
try:
year = int(year)
except:
return (title, 0)
title = title[:left_par_ind].strip()
return (title,year)
if __name__ == "__main__":
x = get_titles_from_file()
y = parse_titles(x)
# for item in y:
# print item