| Each character of text is specified by a value specified 
              according to some encoding scheme. The particular type of encoding, 
              the number of bits and bytes required for the encoding, transformations 
              between encodings, and other issues thus become important, especially 
              for a language like Java that is aimed towards worldwide use. Encoding 
              becomes particularly relevant to I/O when text gets moved between 
              different systems with perhaps different encoding schemes.  So give a brief overview of character encodings here.  The 7-bit ASCII code set is the most famous, 
              but there are many extended eight bit sets in which the first 128 
              codes are ASCII and the extra 128 codes provide symbols and characters 
              needed for other languages besides English.  For example, the ISO-Latin-1 set (ISO Standard 
              8859-1) provides characters for most West European languages and 
              for a few other languages such as Indonesian.  Java 
              itself is based on the 2-byte Unicode representation of characters. 
              The sixteen bits provide for a character set of 65,535 entries and 
              so allows for broad international use.  The first 256 entries in 2-byte Unicode are 
              identical to the ISO-Latin-1 set. That makes the 2-byte Unicode 
              inefficient for programs in English since the second byte is seldom 
              needed. Therefore, a scheme called UTF-8 is used to encode 
              text characters (e.g. string literals) for the Java class files.  The UTF code varies from 1 byte to 3 bytes. If a 
              byte begins with a 0 bit, then the lower 7 bits represent one of 
              the 128 ASCII characters. If the byte begins with the bits 110, 
              then it is the first of a two byte pair that represent the Unicode 
              values for 128 to 2047. If any byte begins with 1110, then it is 
              the first of a three byte set that can hold any of the other Unicode 
              values.  Thus, UTF trades the ability to only one byte most 
              of the time for occasionally needing to use up to three bytes. For 
              text in English and many other languages, this is a good tradeoff 
              that can drastically reduce file size over those in strict Unicode. Java typically runs on platforms that use one byte 
              extended ASCII encoded characters. Therefore, text I/O with the 
              local platform, or with other platforms over the network, must convert 
              between the encodings. As we mentioned in the previous section, 
              the original one byte streams were not convenient for this so the 
              Reader/Writer 
              classes for two byte I/O were introduced.  The default encoding is typically ISO-Latin-1, but 
              your program can find the local encoding with the following static 
              method in the System: 
              String 
              local_encoding = System.getProperty ("file.encoding");  
               The encoding can be explicitly specified in some 
                cases via the constructor such as in the following file output:  FileOutputStream 
              out_file = new FileOutputStream ("Turkish.txt");OutputStreamWriter file_writer = new OutputStreamWriter (out_file, 
              "8859_3");
  A similar overloaded constructor is available for 
               InputStreamReader. 
              See the book by Harold for more information about character encoding 
              in Java. More about Unicode  If a character is not available on your keyboard, 
              it can be specified in a Java program by its Unicode value. This 
              value is represented with four hexadecimal numbers preceded by the 
              "\u" escape sequence. For example, the "ö" character is given by 
              \u00F6 and 
              "è" by \u00E8. The program UnicodesApplet 
              shows examples of characters specified by their Unicode values and 
              drawn on the applet panel.  
              
                 
                  |  |   
                  |  import 
                      javax.swing.*;import java.awt.*;
 
 /** Unicode demo program. **/
 public class UnicodesApplet extends JApplet
 {
 
 public void init ()  {
 Container content_pane = getContentPane 
                      ();
 
 // Create an instance of DrawingPanel
 DrawingPanel drawing_panel = new 
                      DrawingPanel ();
 
 // Add the DrawingPanel to the content 
                      pane.
 content_pane.add (drawing_panel);
 
 } // init
 
 } // class UnicodesApplet
 
 /** Display unicode characters. **/
 class DrawingPanel extends JPanel
 {
 public void paintComponent (Graphics g)  {
 // First paint background unless 
                      you will
 // paint whole area yourself.
 super.paintComponent (g);
 
 g.drawString ("\u00e5 = \\u00e5", 
                      10, 12 );
 g.drawString ("\u00c5 = \\u00c5", 
                      10, 24 );
 g.drawString ("\u00e4 = \\u00e4", 
                      10, 36 );
 g.drawString ("\u00c4 = \\u00c4", 
                      10, 48 );
 g.drawString ("\u00d6 = \\u00d6", 
                      10, 60 );
 g.drawString ("\u00f6 = \\u00f6", 
                      10, 72 );
 
 } // paintComponent
 
 } // class DrawingPanel
 
 |    Remember to differentiate clearly between the character 
              encoding and a font. A font is a specification of how a particular 
              character is displayed. On a given plaform a character code will 
              either point to a known font for that code in the set of fonts available 
              on the system or to a default symbol indicating an unknown character. 
              See the applet below to see how the fonts appear on your platform 
              for a subset of Unicode values. We note finally that even the 65,535 entries of the 
              version of Unicode used by Java are not enough to encompass all 
              of the language characters and symbol sets in the world. Therefore, 
              Java will gradually transition to Unicode 4.0, which uses 32 bits. 
              This is a challenge for many reasons including the fact that the 
              char primitive is only 16-bit. Java 5.0 has some tools for dealing 
              with 32-bit supplementary characters but we don't have space here 
              to discuss them. We refer the reader to the article by Lindenberg 
              for further information on 32-bit character support in Java. References & Web 
              Resources Addendum: Font 
              Tables Applet As a bonus feature, we present the following applet 
              that displays the fonts available on your platform. The menu gives 
              a list of the font sets and selecting a font will display their 
              attributes in the middle text area. The bottom panel shows the characters 
              drawn for the first 256 Unicode values with the selected fonts. 
              The row value X and the column value Y correspond 
              to \u00XY Unicode values. Note that other fonts may be available 
              for other Unicode values.  
              
                 
                  |  |   
                  |  [Note: 
                      The font code array initial values in FontArea have 
                      been reduced in size to fit this page.]
 import 
                      java.awt.*;
 import javax.swing.*;
 import java.awt.event.*;
 import javax.swing.event.*;
 
 /** An applet to display the character tables
 * as function of text character and as function
 * of Unicode value.
 **/
 public class UnicodeFontsTables extends JApplet
 implements ItemListener
 {
 
 private JComboBox fFontChoice;
 private JComboBox fStyleChoice;
 private JComboBox fSizeChoice;
 private JTextArea fTextArea;
 private FontArea fArea;
 Font fFontPick;
 
 /** Set up the interface to display 
                      the fonts.  **/
 public void init () {
 
 // Create a control 
                      panel to select font family, style and size
 setLayout (new BorderLayout ());
 
 Panel choice_panel = new Panel ();
 choice_panel.setLayout (new FlowLayout 
                      (FlowLayout.LEFT));
 
 GraphicsEnvironment ge = GraphicsEnvironment.
 getLocalGraphicsEnvironment();
 
 String[] font_names = ge.getAvailableFontFamilyNames();
 String default_font_name = getFont 
                      ().getName ();
 
 fFontChoice = new JComboBox (font_names);
 fFontChoice.addItemListener (this);
 fFontChoice.setSelectedIndex (0);
 choice_panel.add (fFontChoice );
 
 String [] styles = {"PLAIN", "BOLD", 
                      "ITALIC", "BOLD ITALIC"};
 fStyleChoice = new JComboBox(styles);
 fStyleChoice.setSelectedIndex (0);
 fStyleChoice.addItemListener (this);
 choice_panel.add (fStyleChoice);
 
 String [] sizes = {"6", "8", "10", 
                      "12", "15", "20", "25"};
 fSizeChoice = new JComboBox (sizes);
 fSizeChoice.addItemListener (this);
 fSizeChoice.setSelectedIndex (3);
 choice_panel.add (fSizeChoice);
 
 add (BorderLayout.NORTH, choice_panel);
 
 // Text area will display fonts 
                      for various text characters
 fTextArea = new JTextArea ();
 fTextArea.setEditable (false);
 JScrollPane scroll_pane = new JScrollPane(fTextArea);
 add ("Center", scroll_pane );
 
 // Use a canvas to draw the characters 
                      as function
 // of Unicode value.
 fArea = new FontArea (this);
 add (BorderLayout.CENTER, fArea);
 
 browse ();
 }
 
 /** Event handler. **/
 public void itemStateChanged (ItemEvent evt) 
                      {
 browse ();
 }
 
 /**
 * Display a set of code values and 
                      the corresponding fonts for a
 * particular font choice.
 **/
 private void browse ()  {
 if(fTextArea == null) return;
 fTextArea.setText ("");
 String font_name = (String) (fFontChoice.getSelectedItem());
 if (font_name.equals (""))
 return;
 
 String styleStr = (String) (fStyleChoice.getSelectedItem 
                      ());
 int style;
 if (styleStr.equals ("PLAIN"))
 style = 
                      Font.PLAIN;
 else if (styleStr.equals ("BOLD"))
 style = 
                      Font.BOLD;
 else if (styleStr.equals ("ITALIC"))
 style = 
                      Font.ITALIC;
 else if (styleStr.equals ("BOLD 
                      ITALIC"))
 style = 
                      Font.BOLD | Font.ITALIC;
 else
 style = 
                      Font.PLAIN;
 
 String sizeStr = (String) (fSizeChoice.getSelectedItem 
                      ());
 int size = Integer.parseInt (sizeStr);
 
 Font font = new Font (font_name, 
                      style, size);
 
 fTextArea.setFont (font);
 fTextArea.append ("family: " + font.getFamily 
                      () + "\n");
 fTextArea.append ("name: " + font.getName 
                      () + "\n");
 fTextArea.append (
 "style:" 
                      +
 ( font.isPlain 
                      () ? " PLAIN" : "" ) +
 ( font.isBold 
                      () ? " BOLD" : "" ) +
 ( font.isItalic 
                      () ? " ITALIC" : "" ) +
 "\n" );
 fTextArea.append ("size: " + font.getSize 
                      () + "\n");
 fTextArea.append ("\n");
 FontMetrics fm = fTextArea.getFontMetrics 
                      (font);
 
 if (fm == null) return;
 
 fTextArea.append ("leading: " + 
                      fm.getLeading () + "\n");
 fTextArea.append ("ascent: " + fm.getAscent 
                      () + "\n");
 fTextArea.append ("descent: " + 
                      fm.getDescent () + "\n");
 fTextArea.append ("height: " + fm.getHeight 
                      () + "\n");
 fTextArea.append ("max ascent: " 
                      + fm.getMaxAscent () + "\n");
 fTextArea.append ("max descent: 
                      " + fm.getMaxDescent () + "\n");
 fTextArea.append ("max advance: 
                      " + fm.getMaxAdvance () + "\n");
 
 int [] widths = fm.getWidths ();
 boolean fixed = true;
 for (int i = 33; i <= 126; ++i ) 
                      {
 if (widths[i] 
                      != widths[32]) {
 fixed = false;
 break;
 }
 }
 if (fixed)
 fTextArea.append 
                      ("fixed width\n");
 else
 fTextArea.append 
                      ("variable width\n");
 
 fTextArea.append ("\n");
 fTextArea.append (" !\"#$%&' ()*+,-./0123456789:;<=>?\n");
 fTextArea.append ("@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_\n");
 fTextArea.append ("`abcdefghijklmnopqrstuvwxyz{|}~\n") 
                      ;
 
 fFontPick = font;
 repaint ();
 
 } // browse
 
 public static void main(String[] args)
 {
 
 UnicodeFontsTables applet 
                      = new UnicodeFontsTables();
 
 // Following anonymous 
                      class used to close window & exit program
 JFrame f = new JFrame("Unicodes 
                      & Fonts");
 // Set mode for closing 
                      the frame via the window exit button.
 f.setDefaultCloseOperation 
                      (JFrame.EXIT_ON_CLOSE);
 
 f.getContentPane().add(applet);
 f.setSize(new Dimension(500,800));
 applet.init();
 f.setVisible(true);
 
 } // main
 
 } // class UnicodeFontsTables
 
 /** The panel on which the font table is displayed. **/
 class FontArea extends JPanel
 {
 
 // Table of Unicode values for Latin codes.
 // u\0022 = " is skipped since it is interpreted 
                      as end of string.
 // u-005C = \ also skipped since it causes the 
                      next \ u to be
 //          interpreted 
                      as \ and then a u
 //          rather 
                      than as a single escape character.
 // u\000d = Caused string not terminated error 
                      using 1.2 compiler.
 //          
                      So substituted 000c.- Mar 25,1999.
 
 String[] fs = {
 "\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007\u0008\u0009\u0009\u000b\u000c\u000c\u000e\u000f 
                      ",
 "\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001a\u001b\u001c\u001d\u001e\u001f 
                      ",
 "\u0020\u0021\u0021\u0023\u0024\u0025\u0026\u0027\u0028\u0029\u002a\u002b\u002c\u002d\u002e\u002f 
                      ",
 "\u0030\u0031\u0032\u0033\u0034\u0035\u0036\u0037\u0038\u0039\u003a\u003b\u003c\u003d\u003e\u003f 
                      ",
 "\u0040\u0041\u0042\u0043\u0044\u0045\u0046\u0047\u0048\u0049\u004a\u004b\u004c\u004d\u004e\u004f 
                      ",
 "\u0050\u0051\u0052\u0053\u0054\u0055\u0056\u0057\u0058\u0059\u005a\u005b\u005d\u005d\u005e\u005f 
                      ",
 "\u0060\u0061\u0062\u0063\u0064\u0065\u0066\u0067\u0068\u0069\u006a\u006b\u006c\u006d\u006e\u006f 
                      ",
 "\u0070\u0071\u0072\u0073\u0074\u0075\u0076\u0077\u0078\u0079\u007a\u007b\u007c\u007d\u007e\u007f 
                      ",
 "\u0080\u0081\u0082\u0083\u0084\u0085\u0086\u0087\u0088\u0089\u008a\u008b\u008c\u008d\u008e\u008f 
                      ",
 "\u0090\u0091\u0092\u0093\u0094\u0095\u0096\u0097\u0098\u0099\u009a\u009b\u009c\u009d\u009e\u009f 
                      ",
 "\u00a0\u00a1\u00a2\u00a3\u00a4\u00a5\u00a6\u00a7\u00a8\u00a9\u00aa\u00ab\u00ac\u00ad\u00ae\u00af 
                      ",
 "\u00b0\u00b1\u00b2\u00b3\u00b4\u00b5\u00b6\u00b7\u00b8\u00b9\u00ba\u00bb\u00bc\u00bd\u00be\u00bf 
                      ",
 "\u00c0\u00c1\u00c2\u00c3\u00c4\u00c5\u00c6\u00c7\u00c8\u00c9\u00ca\u00cb\u00cc\u00cd\u00ce\u00cf 
                      ",
 "\u00d0\u00d1\u00d2\u00d3\u00d4\u00d5\u00d6\u00d7\u00d8\u00d9\u00da\u00db\u00dc\u00dd\u00de\u00df 
                      ",
 "\u00e0\u00e1\u00e2\u00e3\u00e4\u00e5\u00e6\u00e7\u00e8\u00e9\u00ea\u00eb\u00ec\u00ed\u00ee\u00ef 
                      ",
 "\u00f0\u00f1\u00f2\u00f3\u00f4\u00f5\u00f6\u00f7\u00f8\u00f9\u00fa\u00fb\u00fc\u00fd\u00fe\u00ff 
                      "
 };
 
 UnicodeFontsTables fParent;
 
 FontArea (UnicodeFontsTables f) {
 fParent = f;
 setBackground (Color.blue);
 // Set the " and / characters directly 
                      here.
 char [] ca = fs[2].toCharArray ();
 ca[2] = '\u0022';
 fs[2] = new String (ca);
 ca = fs[5].toCharArray ();
 ca[12] = '\u005C\u005C';
 fs[5]  = new String (ca);
 } // ctor
 
 /** Use the Panel to display the fonts for the 
                      Unicode table. **/
 public void paintComponent (java.awt.Graphics 
                      graphics) {
 
 // Use the Courier fonts for the 
                      row-column numbering
 Font std_font = new Font ("Courier", 
                      Font.BOLD,
 fParent.fFontPick.getSize ());
 
 // Get various setup parameters 
                      for making the table
 int wc =  (getSize ().width)/18;
 graphics.setFont (std_font );
 FontMetrics tm = graphics.getFontMetrics 
                      ();
 int numw = tm.stringWidth ("FF");
 int hs = tm.getHeight ();
 
 // Draw the column numbers along 
                      top
 int y = 20;
 int x = numw+2;
 char [] ca = new char[1];
 for (int j=0; j<16; j++)  {
 ca = Integer.toHexString 
                      (j).toCharArray ();
 graphics.drawChars 
                      (ca,0,1,x,y);
 x += wc;
 }
 
 // Draw the row number and then 
                      the characters.
 y = 40;
 for (int i=0; i < 16; i++) {
 graphics.setFont 
                      (std_font );
 graphics.drawString 
                      (Integer.toHexString (16*i),2,y);
 graphics.setFont 
                      (fParent.fFontPick );
 ca = fs[i].toCharArray 
                      ();
 // graphics.drawString 
                      (fs[i],20,y);
 x = numw 
                      + 5;
 for (int 
                      j=0; j < 16; j++) {
 graphics.drawChars 
                      (ca,j,1,x,y);
 x 
                      += wc;
 }
 y +=  (hs 
                      + 2);
 }
 } // paintComponent
 
 } // class FontArea
 |    Latest update: Nov. 13, 2004 |