001 /* CharsetDecoder.java -- 002 Copyright (C) 2002 Free Software Foundation, Inc. 003 004 This file is part of GNU Classpath. 005 006 GNU Classpath is free software; you can redistribute it and/or modify 007 it under the terms of the GNU General Public License as published by 008 the Free Software Foundation; either version 2, or (at your option) 009 any later version. 010 011 GNU Classpath is distributed in the hope that it will be useful, but 012 WITHOUT ANY WARRANTY; without even the implied warranty of 013 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 014 General Public License for more details. 015 016 You should have received a copy of the GNU General Public License 017 along with GNU Classpath; see the file COPYING. If not, write to the 018 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 019 02110-1301 USA. 020 021 Linking this library statically or dynamically with other modules is 022 making a combined work based on this library. Thus, the terms and 023 conditions of the GNU General Public License cover the whole 024 combination. 025 026 As a special exception, the copyright holders of this library give you 027 permission to link this library with independent modules to produce an 028 executable, regardless of the license terms of these independent 029 modules, and to copy and distribute the resulting executable under 030 terms of your choice, provided that you also meet, for each linked 031 independent module, the terms and conditions of the license of that 032 module. An independent module is a module which is not derived from 033 or based on this library. If you modify this library, you may extend 034 this exception to your version of the library, but you are not 035 obligated to do so. If you do not wish to do so, delete this 036 exception statement from your version. */ 037 038 package java.nio.charset; 039 040 import java.nio.ByteBuffer; 041 import java.nio.CharBuffer; 042 043 /** 044 * @author Jesse Rosenstock 045 * @since 1.4 046 */ 047 public abstract class CharsetDecoder 048 { 049 private static final int STATE_RESET = 0; 050 private static final int STATE_CODING = 1; 051 private static final int STATE_END = 2; 052 private static final int STATE_FLUSHED = 3; 053 054 private static final String DEFAULT_REPLACEMENT = "\uFFFD"; 055 056 private final Charset charset; 057 private final float averageCharsPerByte; 058 private final float maxCharsPerByte; 059 private String replacement; 060 061 private int state = STATE_RESET; 062 063 private CodingErrorAction malformedInputAction 064 = CodingErrorAction.REPORT; 065 private CodingErrorAction unmappableCharacterAction 066 = CodingErrorAction.REPORT; 067 068 private CharsetDecoder (Charset cs, float averageCharsPerByte, 069 float maxCharsPerByte, String replacement) 070 { 071 if (averageCharsPerByte <= 0.0f) 072 throw new IllegalArgumentException ("Non-positive averageCharsPerByte"); 073 if (maxCharsPerByte <= 0.0f) 074 throw new IllegalArgumentException ("Non-positive maxCharsPerByte"); 075 076 this.charset = cs; 077 this.averageCharsPerByte 078 = averageCharsPerByte; 079 this.maxCharsPerByte 080 = maxCharsPerByte; 081 this.replacement = replacement; 082 implReplaceWith (replacement); 083 } 084 085 protected CharsetDecoder (Charset cs, float averageCharsPerByte, 086 float maxCharsPerByte) 087 { 088 this (cs, averageCharsPerByte, maxCharsPerByte, DEFAULT_REPLACEMENT); 089 } 090 091 public final float averageCharsPerByte () 092 { 093 return averageCharsPerByte; 094 } 095 096 public final Charset charset () 097 { 098 return charset; 099 } 100 101 public final CharBuffer decode (ByteBuffer in) 102 throws CharacterCodingException 103 { 104 // XXX: Sun's Javadoc seems to contradict itself saying an 105 // IllegalStateException is thrown "if a decoding operation is already 106 // in progress" and also that "it resets this Decoder". 107 // Should we check to see that the state is reset, or should we 108 // call reset()? 109 if (state != STATE_RESET) 110 throw new IllegalStateException (); 111 112 // REVIEW: Using max instead of average may allocate a very large 113 // buffer. Maybe we should do something more efficient? 114 int remaining = in.remaining (); 115 int n = (int) (remaining * maxCharsPerByte ()); 116 CharBuffer out = CharBuffer.allocate (n); 117 118 if (remaining == 0) 119 { 120 state = STATE_FLUSHED; 121 return out; 122 } 123 124 CoderResult cr = decode (in, out, true); 125 if (cr.isError ()) 126 cr.throwException (); 127 128 cr = flush (out); 129 if (cr.isError ()) 130 cr.throwException (); 131 132 reset(); 133 out.flip (); 134 135 // Unfortunately, resizing the actual charbuffer array is required. 136 char[] resized = new char[out.remaining()]; 137 out.get(resized); 138 return CharBuffer.wrap(resized); 139 } 140 141 public final CoderResult decode (ByteBuffer in, CharBuffer out, 142 boolean endOfInput) 143 { 144 int newState = endOfInput ? STATE_END : STATE_CODING; 145 // XXX: Need to check for "previous step was an invocation [not] of 146 // this method with a value of true for the endOfInput parameter but 147 // a return value indicating an incomplete decoding operation" 148 // XXX: We will not check the previous return value, just 149 // that the previous call passed true for endOfInput 150 if (state != STATE_RESET && state != STATE_CODING 151 && !(endOfInput && state == STATE_END)) 152 throw new IllegalStateException (); 153 state = newState; 154 155 for (;;) 156 { 157 CoderResult cr; 158 try 159 { 160 cr = decodeLoop (in, out); 161 } 162 catch (RuntimeException e) 163 { 164 throw new CoderMalfunctionError (e); 165 } 166 167 if (cr.isOverflow ()) 168 return cr; 169 170 if (cr.isUnderflow ()) 171 { 172 if (endOfInput && in.hasRemaining ()) 173 cr = CoderResult.malformedForLength (in.remaining ()); 174 else 175 return cr; 176 } 177 178 CodingErrorAction action = cr.isMalformed () 179 ? malformedInputAction 180 : unmappableCharacterAction; 181 182 if (action == CodingErrorAction.REPORT) 183 return cr; 184 185 if (action == CodingErrorAction.REPLACE) 186 { 187 if (out.remaining () < replacement.length ()) 188 return CoderResult.OVERFLOW; 189 out.put (replacement); 190 } 191 192 in.position (in.position () + cr.length ()); 193 } 194 } 195 196 protected abstract CoderResult decodeLoop (ByteBuffer in, CharBuffer out); 197 198 public Charset detectedCharset () 199 { 200 throw new UnsupportedOperationException (); 201 } 202 203 public final CoderResult flush (CharBuffer out) 204 { 205 // It seems weird that you can flush after reset, but Sun's javadoc 206 // says an IllegalStateException is thrown "If the previous step of the 207 // current decoding operation was an invocation neither of the reset 208 // method nor ... of the three-argument decode method with a value of 209 // true for the endOfInput parameter." 210 // Further note that flush() only requires that there not be 211 // an IllegalStateException if the previous step was a call to 212 // decode with true as the last argument. It does not require 213 // that the call succeeded. decode() does require that it succeeded. 214 // XXX: test this to see if reality matches javadoc 215 if (state != STATE_RESET && state != STATE_END) 216 throw new IllegalStateException (); 217 218 state = STATE_FLUSHED; 219 return implFlush (out); 220 } 221 222 protected CoderResult implFlush (CharBuffer out) 223 { 224 return CoderResult.UNDERFLOW; 225 } 226 227 public final CharsetDecoder onMalformedInput (CodingErrorAction newAction) 228 { 229 if (newAction == null) 230 throw new IllegalArgumentException ("Null action"); 231 232 malformedInputAction = newAction; 233 implOnMalformedInput (newAction); 234 return this; 235 } 236 237 protected void implOnMalformedInput (CodingErrorAction newAction) 238 { 239 // default implementation does nothing 240 } 241 242 protected void implOnUnmappableCharacter (CodingErrorAction newAction) 243 { 244 // default implementation does nothing 245 } 246 247 protected void implReplaceWith (String newReplacement) 248 { 249 // default implementation does nothing 250 } 251 252 protected void implReset () 253 { 254 // default implementation does nothing 255 } 256 257 public boolean isAutoDetecting () 258 { 259 return false; 260 } 261 262 public boolean isCharsetDetected () 263 { 264 throw new UnsupportedOperationException (); 265 } 266 267 public CodingErrorAction malformedInputAction () 268 { 269 return malformedInputAction; 270 } 271 272 public final float maxCharsPerByte () 273 { 274 return maxCharsPerByte; 275 } 276 277 public final CharsetDecoder onUnmappableCharacter 278 (CodingErrorAction newAction) 279 { 280 if (newAction == null) 281 throw new IllegalArgumentException ("Null action"); 282 283 unmappableCharacterAction = newAction; 284 implOnUnmappableCharacter (newAction); 285 return this; 286 } 287 288 public final String replacement () 289 { 290 return replacement; 291 } 292 293 public final CharsetDecoder replaceWith (String newReplacement) 294 { 295 if (newReplacement == null) 296 throw new IllegalArgumentException ("Null replacement"); 297 if (newReplacement.length () == 0) 298 throw new IllegalArgumentException ("Empty replacement"); 299 // XXX: what about maxCharsPerByte? 300 301 this.replacement = newReplacement; 302 implReplaceWith (newReplacement); 303 return this; 304 } 305 306 public final CharsetDecoder reset () 307 { 308 state = STATE_RESET; 309 implReset (); 310 return this; 311 } 312 313 public CodingErrorAction unmappableCharacterAction () 314 { 315 return unmappableCharacterAction; 316 } 317 }