-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathconvert-encoding.sh
147 lines (134 loc) · 3.58 KB
/
convert-encoding.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
#!/bin/bash
function usage() {
echo "Usage: ./convert-encoding.sh [-p] [-d] [-s] [-e]"
echo ""
echo "Converts file encoding/charset of multiple files inside the current"
echo "directory matching a pattern."
echo ""
echo "Uses 'file -i' and 'iconv' under the hood"
echo ""
echo "Example usage for automated encoding detection"
echo "This converts all *.java files from unknown charset to UTF-8"
echo "./convert-encoding.sh -p *.java -d -e UTF-8"
echo ""
echo "Example usage for defined source encoding/charset"
echo "This converts all *.xml files from ISO-8859-1 to UTF-8"
echo "./convert-encoding.sh -p *.xml -s ISO-8859-1 -e UTF-8"
echo ""
echo "Arguments:"
echo " -p, --pattern File pattern"
echo " -d, --detect Auto detect source encoding/charset"
echo " You must ether provide this argument or -s"
echo " -s, --source-encoding Encoding/charset of source files"
echo " -e, --encoding Target encoding/charset"
echo " -v, --verbose Prints filenames, source and target encoding"
echo " -t, --dry-run No conversion happens. Always verbose"
echo " -h, --help Shows this help output"
exit 0
}
# Get arguments
POSITIONAL=()
while [[ $# -gt 0 ]]
do
key="$1"
case $key in
-h|--help)
help=true
shift # past argument
;;
-p|--pattern)
file_pattern="$2"
shift # past argument
shift # past value
;;
-d|--detect)
detect=true
shift # past argument
;;
-d|--detect)
detect=true
shift # past argument
;;
-s|--source-encoding)
defined_source_encoding="$2"
shift # past argument
shift # past argument
;;
-e|--encoding)
encoding="$2"
shift # past argument
shift # past value
;;
-v|--verbose)
verbose=true
shift # past argument
;;
-t|--dry-run)
dry_run=true
shift # past argument
;;
*) # unknown option
POSITIONAL+=("$1") # save it in an array for later
shift # past argument
;;
esac
done
set -- "${POSITIONAL[@]}" # restore positional parameters
# Check help
if [ "$help" == true ]
then
usage
fi
# Validate arguments
valid_arguments=true
if [ -z ${file_pattern+x} ]
then
valid_arguments=false
echo "Missing argument -p, --pattern"
fi
if [ -z ${encoding+x} ]
then
valid_arguments=false
echo "Missing argument -e, --encoding"
fi
if [ -z ${detect+x} ] && [ -z ${defined_source_encoding+x} ]
then
valid_arguments=false
echo "Either argument -d, --detect or -s, --source-encoding must be set"
fi
if [ "${valid_arguments}" == "false" ]
then
echo ""
usage
fi
if [ "$dry_run" == "true" ]
then
echo "Dry-Run..."
fi
# Convert encoding
for file in $(find . -type f -name "$file_pattern")
do
# Check if a a source encoding is given
if [ -z ${defined_source_encoding+x} ]
then
# If not, determine encoding
source_encoding=$(file -i $file | grep -oP 'charset=\K.*')
else
source_encoding=${defined_source_encoding}
fi
# If verbose or dry run, print source and target encoding
if [ "${verbose}" == "true" ] || [ "$dry_run" == "true" ]
then
echo "Source encoding: ${source_encoding}; target encoding: ${encoding}; ${file}"
fi
# Don't execute if it's a dry-run
if [ -z ${dry_run+x} ]
then
# Move file
mv $file $file.icv
# convert moved file back to it's original name
iconv -f $source_encoding -t utf-8 $file.icv > $file
# remove moved file
rm -f $file.icv
fi
done