Java : Charset (文字コード) - API使用例
Charset (Java SE 22 & JDK 22) の使い方まとめです。
ほとんどのメソッドにサンプルコードがあります。
API仕様書のおともにどうぞ。
概要
Charset は、文字コード(文字セット) を表すクラスです。
例えば、文字列をバイナリデータに変換(エンコード)するときに、どの文字コードを使うか指定できます。
final var utf8 = StandardCharsets.UTF_8;
System.out.println(utf8); // UTF-8
final var sjis = Charset.forName("Shift_JIS");
System.out.println(sjis); // Shift_JIS
final var text = "abc♪";
System.out.println(text); // abc♪
final var encodedUtf8 = text.getBytes(utf8);
System.out.println(Arrays.toString(encodedUtf8)); // [97, 98, 99, -30, -103, -86]
final var encodedSjis = text.getBytes(sjis);
System.out.println(Arrays.toString(encodedSjis)); // [97, 98, 99, -127, -12]
デフォルトの文字コードは Charset.defaultCharset で取得できます。
final var charset = Charset.defaultCharset();
System.out.println(charset); // UTF-8
コンストラクタ
Charset (String canonicalName, String[] aliases)
protectedです。
独自にサブクラスを作ることは少ないと思いますので、コード例は割愛します。
メソッド
final Set<String> aliases ()
final var charset = StandardCharsets.UTF_8;
System.out.println(charset); // UTF-8
System.out.println(charset.aliases()); // [unicode-1-1-utf-8, UTF8]
static SortedMap<String,Charset> availableCharsets ()
final var charsets = Charset.availableCharsets();
for (final var charset : charsets.values()) {
System.out.println(charset);
}
// 結果
// ↓
//Big5
//Big5-HKSCS
//CESU-8
//EUC-JP
//EUC-KR
//GB18030
//GB2312
//GBK
//...
//Shift_JIS
//...
//UTF-16
//UTF-16BE
//UTF-16LE
//UTF-32
//UTF-32BE
//UTF-32LE
//UTF-8
//...
boolean canEncode ()
final var charset = StandardCharsets.UTF_8;
System.out.println(charset); // UTF-8
System.out.println(charset.canEncode()); // true
final var encoded = charset.encode("abc♪");
System.out.println(encoded); // java.nio.HeapByteBuffer[pos=0 lim=6 cap=9]
final var array = encoded.array();
System.out.println(Arrays.toString(array)); // [97, 98, 99, -30, -103, -86, 0, 0, 0]
final int compareTo (Charset that)
final var charset1 = StandardCharsets.UTF_8;
final var charset2 = StandardCharsets.ISO_8859_1;
final var charset3 = StandardCharsets.US_ASCII;
System.out.println(charset1.compareTo(charset1)); // 0
System.out.println(charset1.compareTo(charset2)); // 12
System.out.println(charset1.compareTo(charset3)); // 1
System.out.println(charset2.compareTo(charset3)); // -12
abstract boolean contains (Charset cs)
final var utf8 = StandardCharsets.UTF_8;
final var ascii = StandardCharsets.US_ASCII;
System.out.println(utf8.contains(ascii)); // true
System.out.println(ascii.contains(utf8)); // false
final CharBuffer decode (ByteBuffer bb)
final var charset = StandardCharsets.UTF_8;
System.out.println(charset); // UTF-8
final var encoded = charset.encode("abc♪");
System.out.println(encoded); // java.nio.HeapByteBuffer[pos=0 lim=6 cap=9]
final var array = encoded.array();
System.out.println(Arrays.toString(array)); // [97, 98, 99, -30, -103, -86, 0, 0, 0]
final var decoded = charset.decode(encoded);
System.out.println(decoded); // abc♪
static Charset defaultCharset ()
final var charset = Charset.defaultCharset();
System.out.println(charset); // UTF-8
String displayName ()
final var utf8 = StandardCharsets.UTF_8;
System.out.println(utf8.displayName()); // UTF-8
final var ascii = StandardCharsets.US_ASCII;
System.out.println(ascii.displayName()); // US-ASCII
String displayName (Locale locale)
System.out.println(Locale.getDefault().toLanguageTag()); // ja-JP
final var charset = StandardCharsets.UTF_8;
System.out.println(charset.displayName()); // UTF-8
System.out.println(charset.displayName(Locale.US)); // UTF-8
final ByteBuffer encode (String str)
final var charset = StandardCharsets.UTF_8;
System.out.println(charset); // UTF-8
final var encoded = charset.encode("abc♪");
System.out.println(encoded); // java.nio.HeapByteBuffer[pos=0 lim=6 cap=9]
final var array = encoded.array();
System.out.println(Arrays.toString(array)); // [97, 98, 99, -30, -103, -86, 0, 0, 0]
final var decoded = charset.decode(encoded);
System.out.println(decoded); // abc♪
final ByteBuffer encode (CharBuffer cb)
final var charset = StandardCharsets.UTF_8;
System.out.println(charset); // UTF-8
final var cb = CharBuffer.wrap("abc♪");
final var encoded = charset.encode(cb);
System.out.println(encoded); // java.nio.HeapByteBuffer[pos=0 lim=6 cap=9]
final var array = encoded.array();
System.out.println(Arrays.toString(array)); // [97, 98, 99, -30, -103, -86, 0, 0, 0]
final var decoded = charset.decode(encoded);
System.out.println(decoded); // abc♪
final boolean equals (Object ob)
final var charset1 = StandardCharsets.UTF_8;
final var charset2 = StandardCharsets.US_ASCII;
System.out.println(charset1.equals(charset1)); // true
System.out.println(charset1.equals(charset2)); // false
static Charset forName (String charsetName)
final var utf8 = Charset.forName("UTF-8");
System.out.println(utf8); // UTF-8
System.out.println(utf8.equals(StandardCharsets.UTF_8)); // true
final var sjis = Charset.forName("Shift_JIS");
System.out.println(sjis); // Shift_JIS
static Charset forName (String charsetName, Charset fallback)
final var charset = Charset.forName("UTF-8", StandardCharsets.US_ASCII);
System.out.println(charset); // UTF-8
final var charset = Charset.forName("xxx", StandardCharsets.US_ASCII);
System.out.println(charset); // US-ASCII
final int hashCode ()
final var charset = StandardCharsets.UTF_8;
System.out.println(charset.hashCode()); // 81070450
final var charset = StandardCharsets.UTF_16;
System.out.println(charset.hashCode()); // -1781783509
final var charset = StandardCharsets.US_ASCII;
System.out.println(charset.hashCode()); // -185735358
final boolean isRegistered ()
final var charset = StandardCharsets.UTF_8;
System.out.println(charset); // UTF-8
System.out.println(charset.isRegistered()); // true
final var charset = Charset.forName("x-windows-50220");
System.out.println(charset); // x-windows-50220
System.out.println(charset.isRegistered()); // false
static boolean isSupported (String charsetName)
System.out.println(Charset.isSupported("UTF-8")); // true
System.out.println(Charset.isSupported("US-ASCII")); // true
System.out.println(Charset.isSupported("Shift_JIS")); // true
System.out.println(Charset.isSupported("xxx")); // false
final String name ()
final var charset = StandardCharsets.UTF_8;
System.out.println(charset.name()); // UTF-8
final var charset = StandardCharsets.US_ASCII;
System.out.println(charset.name()); // US-ASCII
final var charset = Charset.forName("Shift_JIS");
System.out.println(charset.name()); // Shift_JIS
abstract CharsetDecoder newDecoder ()
final var charset = StandardCharsets.UTF_8;
System.out.println(charset); // UTF-8
final var encoder = charset.newEncoder();
final var decoder = charset.newDecoder();
final var cb = CharBuffer.wrap("abc♪");
final var encoded = encoder.encode(cb);
System.out.println(encoded); // java.nio.HeapByteBuffer[pos=0 lim=6 cap=9]
final var array = encoded.array();
System.out.println(Arrays.toString(array)); // [97, 98, 99, -30, -103, -86, 0, 0, 0]
final var decoded = decoder.decode(encoded);
System.out.println(decoded); // abc♪
abstract CharsetEncoder newEncoder ()
final var charset = StandardCharsets.UTF_8;
System.out.println(charset); // UTF-8
final var encoder = charset.newEncoder();
final var decoder = charset.newDecoder();
final var cb = CharBuffer.wrap("abc♪");
final var encoded = encoder.encode(cb);
System.out.println(encoded); // java.nio.HeapByteBuffer[pos=0 lim=6 cap=9]
final var array = encoded.array();
System.out.println(Arrays.toString(array)); // [97, 98, 99, -30, -103, -86, 0, 0, 0]
final var decoded = decoder.decode(encoded);
System.out.println(decoded); // abc♪
final String toString ()
final var charset = StandardCharsets.UTF_8;
final var ret = charset.toString();
System.out.println(ret); // UTF-8
final var charset = StandardCharsets.US_ASCII;
final var ret = charset.toString();
System.out.println(ret); // US-ASCII
final var charset = Charset.forName("Shift_JIS");
final var ret = charset.toString();
System.out.println(ret); // Shift_JIS