Skip to content

Commit

Permalink
Add support for the mac-roman encoding (#45)
Browse files Browse the repository at this point in the history
* Add support for mac-roman encoding
  • Loading branch information
fjl authored Jul 28, 2021
1 parent 911a3e2 commit 41af5dc
Show file tree
Hide file tree
Showing 13 changed files with 84 additions and 6 deletions.
20 changes: 16 additions & 4 deletions docs/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,7 @@ <h4><a name="external-formats" class=none>External formats</a></h4>
<li><a href="http://en.wikipedia.org/wiki/UTF-32">UTF-32</a> (denoted by the keyword <code>:UTF-32</code>),
<li>all <a href="http://czyborra.com/charsets/iso8859.html">ISO 8859</a> character sets (denoted by keywords like <code>:ISO-8859-15</code>),
<li><a href="http://en.wikipedia.org/wiki/KOI8-R">KOI8-R</a> (denoted by the keyword <code>:KOI8-R</code>),
<li><a href="https://en.wikipedia.org/wiki/Mac_OS_Roman">MAC-ROMAN</a> (denoted by the keyword <code>:MAC-ROMAN</code>),
<li>a couple
of <a href="http://czyborra.com/charsets/codepages.html">Windows code
pages</a> (denoted by the keyword <code>:CODE-PAGE</code> and an
Expand Down Expand Up @@ -358,6 +359,9 @@ <h4><a name="external-formats" class=none>External formats</a></h4>
<tr><td rowspan=2 valign=top><code>:CODE-PAGE</code></td><td><code>:CODEPAGE</code></td></tr>
<tr><td><code>WIN32:CODE-PAGE<br>(only on <a href="http://www.lispworks.com/products/lww.html">LWW</a>)</code></td></tr>
<tr><td><code>:KOI8-R</code></td><td><code>:KOI8R</code></td></tr>
<tr><td rowspan=3 valign=top><code>:MAC-ROMAN</code></td><td><code>:MAC</code></td></tr>
<tr><td><code>:MACINTOSH</code></td></tr>
<tr><td><code>:MACOS-ROMAN</code></td></tr>
<tr><td><code>:US-ASCII</code></td><td><code>:ASCII</code></td></tr>
</table>
<p>
Expand Down Expand Up @@ -450,10 +454,18 @@ <h4><a name="external-formats" class=none>External formats</a></h4>
symbol, <code><i>eol-style</i></code> is one of the
keywords <code>:CR</code>, <code>:LF</code>, or <code>:CRLF</code>,
and <code><i>little-endian</i></code> is
a <a
href="http://www.lispworks.com/documentation/HyperSpec/Body/26_glo_g.htm#generalized_boolean">generalized
boolean</a>. The default value for <code><i>eol-style</i></code> is the value of <a href="#*default-eol-style*"><code>*DEFAULT-EOL-STYLE*</code></a> except for Windows code pages where it is <code>:CRLF</code>. The default value
for <code><i>little-endian</i></code> is the value of <a href="#*default-little-endian*"><code>*DEFAULT-LITTLE-ENDIAN*</code></a> - this value is ignored unless <code><i>name</i></code> denotes one of UTF-16 or UTF-32.
a <a href="http://www.lispworks.com/documentation/HyperSpec/Body/26_glo_g.htm#generalized_boolean">generalized
boolean</a>.

<p>
The default value for <code><i>eol-style</i></code> is the value of <a href="#*default-eol-style*"><code>*DEFAULT-EOL-STYLE*</code></a>.
For Windows code pages, the default style is <code>:CRLF</code>. For <code>:MAC-ROMAN</code>, the default is <code>:CR</code>.

<p>
The default value for <code><i>little-endian</i></code> is the value of <a href="#*default-little-endian*"><code>*DEFAULT-LITTLE-ENDIAN*</code></a>
- this value is ignored unless <code><i>name</i></code> denotes one of UTF-16 or UTF-32.

<p>
<code><i>id</i></code> must be an integer denoting a Windows code page
known by FLEXI-STREAMS if <code><i>name</i></code>
is <code>:CODE-PAGE</code> or <code>WIN32:CODE-PAGE</code>, otherwise
Expand Down
12 changes: 10 additions & 2 deletions external-format.lisp
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,8 @@ external format."
(values +ascii-hash+ +ascii-table+))
((koi8-r-name-p name)
(values +koi8-r-hash+ +koi8-r-table+))
((mac-roman-name-p name)
(values +mac-roman-hash+ +mac-roman-table+))
((iso-8859-name-p name)
(values (cdr (assoc name +iso-8859-hashes+ :test #'eq))
(cdr (assoc name +iso-8859-tables+ :test #'eq))))
Expand All @@ -266,6 +268,7 @@ external format."
(:cr 'flexi-cr-latin-1-format)
(:crlf 'flexi-crlf-latin-1-format)))
((or (koi8-r-name-p real-name)
(mac-roman-name-p real-name)
(iso-8859-name-p real-name)
(code-page-name-p real-name))
(ecase eol-style
Expand Down Expand Up @@ -311,9 +314,12 @@ EXTERNAL-FORMAT."
(let* ((real-name (normalize-external-format-name name))
(initargs
(cond ((or (iso-8859-name-p real-name)
(koi8-r-name-p real-name)
(koi8-r-name-p real-name)
(ascii-name-p real-name))
(list :eol-style (or eol-style *default-eol-style*)))
((mac-roman-name-p real-name)
;; Default EOL style for mac-roman is :CR.
(list :eol-style (or eol-style :cr)))
((code-page-name-p real-name)
(list :id (or (known-code-page-id-p id)
(error 'external-format-error
Expand Down Expand Up @@ -371,7 +377,8 @@ object."
;; for non-8-bit encodings the endianess must be the same
(or code-page-name-p
(ascii-name-p name1)
(koi8-r-name-p name1)
(koi8-r-name-p name1)
(mac-roman-name-p name1)
(iso-8859-name-p name1)
(eq name1 :utf-8)
(eq (not (external-format-little-endian ef1))
Expand All @@ -390,6 +397,7 @@ back to MAKE-EXTERNAL-FORMAT to create an equivalent object."
(eol-style (external-format-eol-style external-format)))
(cond ((or (ascii-name-p name)
(koi8-r-name-p name)
(mac-roman-name-p name)
(iso-8859-name-p name)
(eq name :utf-8))
(list name :eol-style eol-style))
Expand Down
1 change: 1 addition & 0 deletions flexi-streams.asd
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
(:file "mapping")
(:file "ascii")
(:file "koi8-r")
(:file "mac")
(:file "iso-8859")
(:file "enc-cn-tbl")
(:file "code-pages")
Expand Down
44 changes: 44 additions & 0 deletions mac.lisp
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-

;;; Copyright (c) 2021, Felix Lange. All rights reserved.

;;; Redistribution and use in source and binary forms, with or without
;;; modification, are permitted provided that the following conditions
;;; are met:

;;; * Redistributions of source code must retain the above copyright
;;; notice, this list of conditions and the following disclaimer.

;;; * Redistributions in binary form must reproduce the above
;;; copyright notice, this list of conditions and the following
;;; disclaimer in the documentation and/or other materials
;;; provided with the distribution.

;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

(in-package :flexi-streams)

;; https://unicode.org/Public/MAPPINGS/VENDORS/APPLE/ROMAN.TXT
(defconstant +mac-roman-table+
(make-decoding-table
'(;; first 128 values match ASCII
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32
33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64
65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96
97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127
;; extended characters > 127
196 197 199 201 209 214 220 225 224 226 228 227 229 231 233 232 234 235 237 236 238 239 241 243 242 244 246 245 250 249 251 252 8224
176 162 163 167 8226 182 223 174 169 8482 180 168 8800 198 216 8734 177 8804 8805 165 181 8706 8721 8719 960 8747 170 186 937 230 248 191 161
172 8730 402 8776 8710 171 187 8230 160 192 195 213 338 339 8211 8212 8220 8221 8216 8217 247 9674 255 376 8260 8364 8249 8250 64257 64258 8225 183 8218
8222 8240 194 202 193 203 200 205 206 207 204 211 212 63743 210 218 219 217 305 710 732 175 728 729 730 184 733 731 711))
"An array enumerating the character codes for the MAC-ROMAN encoding.")
7 changes: 7 additions & 0 deletions specials.lisp
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,9 @@ suitable functional object when this function is called.")
(:ucs-4 . :utf-32)
(:ascii . :us-ascii)
(:koi8r . :koi8-r)
(:mac . :mac-roman)
(:macintosh . :mac-roman)
(:macos-roman . :mac-roman)
(:latin-1 . :iso-8859-1)
(:latin1 . :iso-8859-1)
(:latin-2 . :iso-8859-2)
Expand Down Expand Up @@ -176,6 +179,10 @@ corresponding octets.")
"A hash table which maps KOI8-R character codes to the
corresponding octets.")

(defconstant +mac-roman-hash+ (invert-table +mac-roman-table+)
"A hash table which maps MAC-ROMAN character codes to the
corresponding octets.")

(defconstant +buffer-size+ 8192
"Default size for buffers used for internal purposes.")

Expand Down
Binary file added test/mac_chars_mac_cr.txt
Binary file not shown.
Binary file added test/mac_chars_mac_crlf.txt
Binary file not shown.
Binary file added test/mac_chars_mac_lf.txt
Binary file not shown.
Binary file added test/mac_chars_utf8_cr.txt
Binary file not shown.
Binary file added test/mac_chars_utf8_crlf.txt
Binary file not shown.
Binary file added test/mac_chars_utf8_lf.txt
Binary file not shown.
2 changes: 2 additions & 0 deletions test/test.lisp
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ defined.")
(defvar *test-files*
'(("kafka" (:utf8 :latin1 :cp1252))
("tilton" (:utf8 :ascii))
("mac_chars" (:utf8 :mac))
("hebrew" (:utf8 :latin8))
("russian" (:utf8 :koi8r))
("xjt" (:gbk))
Expand All @@ -135,6 +136,7 @@ endianness."
(:latin8 '(:hebrew))
(:cp1252 '(:code-page :id 1252))
(:koi8r '(:koi8-r))
(:mac '(:mac-roman))
(:utf8 '(:utf-8))
(:gbk '(:gbk))
(:ucs2 '(:utf-16))
Expand Down
4 changes: 4 additions & 0 deletions util.lisp
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,10 @@ signals an error otherwise."
"Checks whether NAME is the keyword :KOI8-R."
(eq name :koi8-r))

(defun mac-roman-name-p (name)
"Checks whether NAME is the keyword :MAC-ROMAN."
(eq name :mac-roman))

(defun code-page-name-p (name)
"Checks whether NAME is the keyword :CODE-PAGE."
(eq name :code-page))
Expand Down

0 comments on commit 41af5dc

Please sign in to comment.