#!/usr/local/bin/python
# ex:ts=4
#
# Unicode hangul abstractive controller
#
#   written by Hye-Shik Chang <perky@fallin.lv>
#
# Unicode Hangul Code-Area Specifications:
#  http://www.unicode.org/charts/PDF/UAC00.pdf
#
# ---------------------------------------------------------------------------- 
# "THE BEER-WARE LICENSE" (Revision 42): 
# <perky@fallin.lv> wrote this file. As long as you retain this notice you 
# can do whatever you want with this stuff. If we meet some day, and you think 
# this stuff is worth it, you can buy me a beer in return. Hye-Shik Chang
# ---------------------------------------------------------------------------- 
#
# $LinuxKorea: UnicodeHangul.py,v 1.3 2001/08/04 05:51:29 perky Exp $'
# $Id: hangul.py,v 1.1.1.1 2002/02/16 00:51:13 perky Exp $
#

class UnicodeHangulError(Exception):
	
	def __init__ (self, msg):
		self.msg = msg
	
	def __repr__ (self):
		return self.msg
	
	__str__ = __repr__

Null = u''

class Jaeum: # XXX: 1100-1159 Old Jaeum need?

	Codes = (u'\u3131', u'\u3132', u'\u3133', u'\u3134', u'\u3135', u'\u3136',
			#    G         GG          GS         N          NJ         NH
			 u'\u3137', u'\u3138', u'\u3139', u'\u313a', u'\u313b', u'\u313c',
			#    D         DD          R          RG         RM         RB
			 u'\u313d', u'\u313e', u'\u313f', u'\u3140', u'\u3141', u'\u3142',
			#    RS        RT          RP         RH         M          B
			 u'\u3143', u'\u3144', u'\u3145', u'\u3146', u'\u3147', u'\u3148',
			#    BB        BS          S          SS         A          J
			 u'\u3149', u'\u314a', u'\u314b', u'\u314c', u'\u314d', u'\u314e')
			#    JJ        CH          K          T          P          H
	Width = len(Codes)
	G, GG, GS, N, NJ, NH, D, DD, R, RG, RM, RB, RS, RT, RP, RH, M, B, \
	BB, BS, S, SS, A, J, JJ, CH, K, T, P, H = Codes
	Chosung = [G, GG, N, D, DD, R, M, B, BB, S, SS, A, J, JJ, CH, K, T, P, H]
	Jongsung = [Null, G, GG, GS, N, NJ, NH, D, R, RG, RM, RB, RS, RT, \
				 RP, RH, M, B, BS, S, SS, A, J, CH, K, T, P, H]
	MultiElement = {
		GG: (G, G),		GS: (G, S),		NJ: (N, J),		NH: (N, H),		DD: (D, D),
		RG: (R, G),		RM: (R, M),		RB: (R, B),		RS: (R, S),		RT: (R, T),
		RP: (R, P),		RH: (R, H),		BB: (B, B),		BS: (B, S),		SS: (S, S),
		JJ: (J, J)
	}


class Moeum: # XXX: 1161-117f Old Moeum need?

	Codes = (u'\u314f', u'\u3150', u'\u3151', u'\u3152', u'\u3153', u'\u3154',
			#    A          AE        YA         YAE         EO         E
			 u'\u3155', u'\u3156', u'\u3157', u'\u3158', u'\u3159', u'\u315a',
			#    YEO        YE        O          WA          WAE        WOE
			 u'\u315b', u'\u315c', u'\u315d', u'\u315e', u'\u315f', u'\u3160',
			#    YO         OO        WO         WE          WI         YU
			 u'\u3161', u'\u3162', u'\u3163')
			#    EU         EUI       I
	Width = len(Codes)
	A, AE, YA, YAE, EO, E, YEO, YE, O, WA, WAE, WOE, YO, \
	OO, WO, WE, WI, YU, EU, EUI, I = Codes
	Jungsung = list(Codes)
	MultiElement = {
		AE: (A, I),		YAE: (YA, I),	YE: (YEO, I),	WA:	(O, A),		WAE: (O, A, I),
		WOE: (O, I),	WO: (OO, EO),	WE: (OO, E),	WI: (OO, I),	EUI: (EU, I)
	}


# Aliases for your convinience
Chosung = Jaeum.Chosung
Jungsung = Moeum.Jungsung
Jongsung = Jaeum.Jongsung

isJaeum = lambda c: c in Jaeum.Codes
isMoeum = lambda c: c in Moeum.Codes

# Unicode Hangul Syllables Characteristics
zone = (u'\uAC00', u'\uD7A3')
splitters = [ ( len(Jongsung)*len(Jungsung), Chosung ),
			  ( len(Jongsung),				 Jungsung ),
			  ( 1,							 Jongsung ) ]

ishangul = (
	lambda code:
		zone[0] <= code <= zone[1] or
		code in Jaeum.Codes or
		code in Moeum.Codes
)

def join(codes):
	""" Join function which makes hangul syllable from jamos """
	if len(codes) is not 3:
		raise UnicodeHangulError("needs 3-element tuple")
	if not codes[0] or not codes[1]: # single jamo
		return codes[0] or codes[1]

	r = ord(zone[0])
	codes = codes[:]  # simple copy :D
	for multiplier, codeset in splitters:
		r = r + multiplier*codeset.index(codes.pop(0))

	return unichr(r)

def split(code):
	""" Split function which splits hangul syllable into jamos """
	if len(code) != 1 or not ishangul(code):
		raise UnicodeHangulError("needs 1 hangul letter")
	if code in Jaeum.Codes:
		return [code, Null, Null]
	if code in Moeum.Codes:
		return [Null, code, Null]

	code = ord(code) - ord(zone[0])
	r = []
	for divider, codeset in splitters:
		value, code = code / divider, code % divider
		r.append(codeset[value])
	return r

def dividestring(str, intoelements=0):
	if type(str) is not type(u''):
		raise UnicodeHangulError("needs unicode string")

	r = u''
	for char in str:
		if ishangul(char):
			elems = split(char)
			for elem in elems:
				for htype in (Jaeum, Moeum, None):
					if htype == None:
						r += elem
					elif intoelements and \
					   htype.MultiElement.has_key(elem):
						r += u''.join(htype.MultiElement[elem])
						break
		else:
			r += char
	
	return r


if __name__ == '__main__':

	print ( join([Jaeum.P, Moeum.EO, Null]) + \
			join([Jaeum.K, Moeum.I, Null]) + \
			join([Jaeum.JJ, Moeum.A, Jaeum.A]) ).encode("utf-8")
	
	while 1:
		code = raw_input(">>> ")
		print dividestring(unicode(code, "utf-8"), 1).encode("utf-8")

