Recently, I had to write an Java app that outputted Unicode text to the console. After fiddling a bit, it was obvious that the Windows command prompt didn’t support this out of the box.
After some googling, I came across this, which seems the best solution I can get.
I came up with this C++ JNI library:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 | #include <Windows.h> #include <jni.h> #define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0])) static jboolean printUnicode(JNIEnv *env, jobject obj, jstring message) { DWORD written; jboolean isCopy; BOOL ret; jint strLength = env->GetStringLength(message); HANDLE out = GetStdHandle(STD_OUTPUT_HANDLE); if (out == INVALID_HANDLE_VALUE) return false; const jchar* str = env->GetStringChars(message, &isCopy); ret = WriteConsoleW(out, str, strLength, &written, NULL); env->ReleaseStringChars(message, str); return ret; } /* Based upon http://blogs.msdn.com/b/michkap/archive/2010/04/07/9989346.aspx */ static jboolean consoleHasTrueTypeFont(JNIEnv *env, jobject obj) { /* Check if STDOUT is mapped to a console */ HANDLE out = GetStdHandle(STD_OUTPUT_HANDLE); if (out == INVALID_HANDLE_VALUE) return false; DWORD filetype = GetFileType(out); if (!(filetype == FILE_TYPE_UNKNOWN && GetLastError() != ERROR_SUCCESS)) { DWORD mode; filetype &= ~(FILE_TYPE_REMOTE); if (filetype == FILE_TYPE_CHAR) { BOOL retval = GetConsoleMode(out, &mode); if (!retval && GetLastError() == ERROR_INVALID_HANDLE) { return false; } } else { return false; } } /* Check if console uses a TrueType font */ CONSOLE_FONT_INFOEX cfie; cfie.cbSize = sizeof(cfie); if (GetCurrentConsoleFontEx(out, false, &cfie)) return cfie.FontFamily & TMPF_TRUETYPE; else return false; } static JNINativeMethod nativeMethods[] = { {"printUnicode", "(Ljava/lang/String;)Z", printUnicode}, {"consoleHasTrueTypeFont", "()Z", consoleHasTrueTypeFont}, }; extern "C" JNIEXPORT jint JNICALL JNI_OnLoad(JavaVM *vm, void *reserved) { JNIEnv* env = NULL; jint result = -1; if (vm->GetEnv((void**) &env, JNI_VERSION_1_4) != JNI_OK) return result; jclass clazz = env->FindClass("WindowsUnicodeOutput"); /* Change this to the appropriate class and/or package! */ if (clazz == NULL) return -1; if (env->RegisterNatives(clazz, nativeMethods, ARRAY_SIZE(nativeMethods)) < 0) return -1; return JNI_VERSION_1_4; } |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 | import java.io.IOException; import java.io.Writer; public class WindowsUnicodeOutput extends Writer { private boolean hasTrueType; @Override public void close() throws IOException { } @Override public void flush() throws IOException { /* We do autoflush */ } @Override public void write(char[] cbuf, int off, int len) throws IOException { String str = new String(cbuf, off, len); if (hasTrueType) printUnicode(str); else System.out.print(str); } public WindowsUnicodeOutput() { String library = getClass().getResource(String.format("WinUnicodeLib_%s.dll", System.getProperty("os.arch"))).getFile(); try { System.load(library); hasTrueType = consoleHasTrueTypeFont(); } catch (Throwable ex) { hasTrueType = false; } if (!hasTrueType) System.out.println("WARNING: your console does not use a TrueType font, you won't be able to view all characters correctly!"); } private native boolean printUnicode(String message); private native boolean consoleHasTrueTypeFont(); } |
This allows you to simply do:
1 2 3 4 5 | PrintWriter output = new PrintWriter(System.out); if (System.getProperty("os.name").toLowerCase().contains("win")) output = new PrintWriter(new WindowsUnicodeOutput()); output.println("\u0627\u0628\u0629 \u043a\u043e\u0448\u043a\u0430 \u65e5\u672c\u56fd"); |
And voila! You should have working Unicode characters on a Windows command prompt (given that you’ve chosen a TrueType font)!
P.S: try downloading DejaVu Sans Mono and adding it to HKLM\Software\Microsoft\WindowsNT\CurrentVersion\Console\TrueTypeFont\000 for a wider range of Unicode glyphs.