Java : Charset with Examples

Charset (Java SE 19 & JDK 19) API Examples.
You will find code examples on most Charset methods.


Summary

A named mapping between sequences of sixteen-bit Unicode code units and sequences of bytes. This class defines methods for creating decoders and encoders and for retrieving the various names associated with a charset.

Class diagram

final var utf8 = StandardCharsets.UTF_8;
System.out.println(utf8); // UTF-8

final var sjis = Charset.forName("Shift_JIS");
System.out.println(sjis); // Shift_JIS

final var text = "abc♪";
System.out.println(text); // abc♪

final var encodedUtf8 = text.getBytes(utf8);
System.out.println(Arrays.toString(encodedUtf8)); // [97, 98, 99, -30, -103, -86]

final var encodedSjis = text.getBytes(sjis);
System.out.println(Arrays.toString(encodedSjis)); // [97, 98, 99, -127, -12]
final var charset = Charset.defaultCharset();
System.out.println(charset); // UTF-8

Constructors

Charset (String canonicalName, String[] aliases)

Initializes a new charset with the given canonical name and alias set.

protected. I think it's rare to create a subclass of Charset. Therefore, the code example is omitted.

Methods

final Set<String> aliases ()

Returns a set containing this charset's aliases.

final var charset = StandardCharsets.UTF_8;
System.out.println(charset); // UTF-8
System.out.println(charset.aliases()); // [unicode-1-1-utf-8, UTF8]

static SortedMap<String,Charset> availableCharsets ()

Constructs a sorted map from canonical charset names to charset objects.

final var charsets = Charset.availableCharsets();
for (final var charset : charsets.values()) {
    System.out.println(charset);
}

// Result
// ↓
//Big5
//Big5-HKSCS
//CESU-8
//EUC-JP
//EUC-KR
//GB18030
//GB2312
//GBK
//...
//UTF-16
//UTF-16BE
//UTF-16LE
//UTF-32
//UTF-32BE
//UTF-32LE
//UTF-8
//...

boolean canEncode ()

Tells whether or not this charset supports encoding.

final var charset = StandardCharsets.UTF_8;
System.out.println(charset); // UTF-8
System.out.println(charset.canEncode()); // true

final var encoded = charset.encode("abc♪");
System.out.println(encoded); // java.nio.HeapByteBuffer[pos=0 lim=6 cap=9]
System.out.println(Arrays.toString(encoded.array())); // [97, 98, 99, -30, -103, -86, 0, 0, 0]

final int compareTo (Charset that)

Compares this charset to another.

final var charset1 = StandardCharsets.UTF_8;
final var charset2 = StandardCharsets.ISO_8859_1;
final var charset3 = StandardCharsets.US_ASCII;

System.out.println(charset1.compareTo(charset1)); // 0
System.out.println(charset1.compareTo(charset2)); // 12
System.out.println(charset1.compareTo(charset3)); // 1

System.out.println(charset2.compareTo(charset3)); // -12

abstract boolean contains (Charset cs)

Tells whether or not this charset contains the given charset.

final var utf8 = StandardCharsets.UTF_8;
final var ascii = StandardCharsets.US_ASCII;

System.out.println(utf8.contains(ascii)); // true
System.out.println(ascii.contains(utf8)); // false

final CharBuffer decode (ByteBuffer bb)

Convenience method that decodes bytes in this charset into Unicode characters.

final var charset = StandardCharsets.UTF_8;
System.out.println(charset); // UTF-8

final var encoded = charset.encode("abc♪");
System.out.println(encoded); // java.nio.HeapByteBuffer[pos=0 lim=6 cap=9]
System.out.println(Arrays.toString(encoded.array())); // [97, 98, 99, -30, -103, -86, 0, 0, 0]

final var decoded = charset.decode(encoded);
System.out.println(decoded); // abc♪

static Charset defaultCharset ()

Returns the default charset of this Java virtual machine.

final var charset = Charset.defaultCharset();
System.out.println(charset); // UTF-8

String displayName ()

Returns this charset's human-readable name for the default locale.

final var utf8 = StandardCharsets.UTF_8;
System.out.println(utf8.displayName()); // UTF-8

final var ascii = StandardCharsets.US_ASCII;
System.out.println(ascii.displayName()); // US-ASCII

String displayName (Locale locale)

Returns this charset's human-readable name for the given locale.

System.out.println(Locale.getDefault().toLanguageTag()); // en-US

final var charset = StandardCharsets.UTF_8;
System.out.println(charset.displayName()); // UTF-8
System.out.println(charset.displayName(Locale.JAPAN)); // UTF-8

final ByteBuffer encode (String str)

Convenience method that encodes a string into bytes in this charset.

Please see decode(ByteBuffer bb).

final ByteBuffer encode (CharBuffer cb)

Convenience method that encodes Unicode characters into bytes in this charset.

final var charset = StandardCharsets.UTF_8;
System.out.println(charset); // UTF-8

final var cb = CharBuffer.wrap("abc♪");

final var encoded = charset.encode(cb);
System.out.println(encoded); // java.nio.HeapByteBuffer[pos=0 lim=6 cap=9]
System.out.println(Arrays.toString(encoded.array())); // [97, 98, 99, -30, -103, -86, 0, 0, 0]

final var decoded = charset.decode(encoded);
System.out.println(decoded); // abc♪

final boolean equals (Object ob)

Tells whether or not this object is equal to another.

final var charset1 = StandardCharsets.UTF_8;
final var charset2 = StandardCharsets.US_ASCII;

System.out.println(charset1.equals(charset1)); // true
System.out.println(charset1.equals(charset2)); // false

static Charset forName (String charsetName)

Returns a charset object for the named charset.

final var utf8 = Charset.forName("UTF-8");
System.out.println(utf8); // UTF-8
System.out.println(utf8.equals(StandardCharsets.UTF_8)); // true

final var sjis = Charset.forName("Shift_JIS");
System.out.println(sjis); // Shift_JIS

static Charset forName (String charsetName, Charset fallback)

Returns a charset object for the named charset.

final var charset = Charset.forName("UTF-8", StandardCharsets.US_ASCII);
System.out.println(charset); // UTF-8
final var charset = Charset.forName("xxx", StandardCharsets.US_ASCII);
System.out.println(charset); // US-ASCII

final int hashCode ()

Computes a hashcode for this charset.

final var charset = StandardCharsets.UTF_8;
System.out.println(charset.hashCode()); // 81070450
final var charset = StandardCharsets.UTF_16;
System.out.println(charset.hashCode()); // -1781783509
final var charset = StandardCharsets.US_ASCII;
System.out.println(charset.hashCode()); // -185735358

final boolean isRegistered ()

Tells whether or not this charset is registered in the IANA Charset Registry.

final var charset = StandardCharsets.UTF_8;
System.out.println(charset); // UTF-8
System.out.println(charset.isRegistered()); // true
final var charset = Charset.forName("x-windows-50220");
System.out.println(charset); // x-windows-50220
System.out.println(charset.isRegistered()); // false

static boolean isSupported (String charsetName)

Tells whether the named charset is supported.

System.out.println(Charset.isSupported("UTF-8")); // true
System.out.println(Charset.isSupported("US-ASCII")); // true
System.out.println(Charset.isSupported("Shift_JIS")); // true

System.out.println(Charset.isSupported("xxx")); // false

final String name ()

Returns this charset's canonical name.

final var charset = StandardCharsets.UTF_8;
System.out.println(charset.name()); // UTF-8
final var charset = StandardCharsets.US_ASCII;
System.out.println(charset.name()); // US-ASCII
final var charset = Charset.forName("Shift_JIS");
System.out.println(charset.name()); // Shift_JIS

abstract CharsetDecoder newDecoder ()

Constructs a new decoder for this charset.

final var charset = StandardCharsets.UTF_8;
System.out.println(charset); // UTF-8

final var encoder = charset.newEncoder();
final var decoder = charset.newDecoder();

final var cb = CharBuffer.wrap("abc♪");

final var encoded = encoder.encode(cb);
System.out.println(encoded); // java.nio.HeapByteBuffer[pos=0 lim=6 cap=9]
System.out.println(Arrays.toString(encoded.array())); // [97, 98, 99, -30, -103, -86, 0, 0, 0]

final var decoded = decoder.decode(encoded);
System.out.println(decoded); // abc♪

abstract CharsetEncoder newEncoder ()

Constructs a new encoder for this charset.

Please see newDecoder().

final String toString ()

Returns a string describing this charset.

final var charset = StandardCharsets.UTF_8;
final var ret = charset.toString();
System.out.println(ret); // UTF-8
final var charset = StandardCharsets.US_ASCII;
final var ret = charset.toString();
System.out.println(ret); // US-ASCII
final var charset = Charset.forName("Shift_JIS");
final var ret = charset.toString();
System.out.println(ret); // Shift_JIS

Related posts

To top of page