// 2007-04-30
#include <windows.h>
#include <richedit.h>
#include "WordLogic.h"

typedef bool (* ClassifyCharProc)( wchar_t* text, int pos );
static int PrevWordStartForMove( wchar_t* text, int textLength, int currentIndex );
static int NextWordStartForMove( wchar_t* text, int textLength, int currentIndex );
static int PrevWordStart( wchar_t* text, int textLength, int currentIndex );
static int NextWordStart( wchar_t* text, int textLength, int currentIndex );
static int PrevWordEnd( wchar_t* text, int textLength, int currentIndex );
static int NextWordEnd( wchar_t* text, int textLength, int currentIndex );

static int SkipBackwardOneEolCode( wchar_t* text, int textLength, int currentIndex );
static int SkipForwardOneEolCode( wchar_t* text, int textLength, int currentIndex );

static ClassifyCharProc GetCharClassifyProc( wchar_t* text, int pos );
static int GetCharClassId( wchar_t* text, int pos );
static bool IsAlphabet( wchar_t* text, int pos );
static bool IsDigit( wchar_t* text, int pos );
static bool IsPunct( wchar_t* text, int pos );
static bool IsEolCode( wchar_t* text, int pos );
static bool IsWhiteSpace( wchar_t* text, int pos );
static bool IsHiragana( wchar_t* text, int pos );
static bool IsKatakana( wchar_t* text, int pos );
static bool IsUnknown( wchar_t* text, int pos );


//-------------------------------------
// function : SgRichEdit_WordBreak
// brief : Custom word break procedure for RichEdit20W window
//-------------------------------------
int CALLBACK
SgRichEdit_WordBreak( wchar_t* text, int pos, int textByteLength, int action )
{
	int textLength = textByteLength / sizeof(char);

	switch( action )
	{
		// get char-class and word break flags of the char
		case WB_CLASSIFY:
			return GetCharClassId( text, pos );

		// checks whether the char is a delimiter
		case WB_ISDELIMITER:
			if( IsPunct(text, pos) )
				return 1;
			else
				return 0;

		// finds the beginning of a word to the left
		case WB_LEFT:
			return PrevWordStart( text, textLength, pos );

		// finds the beginning of a word to the right
		case WB_RIGHT:
			return NextWordStart( text, textLength, pos );

		// finds the end-of-word delimiter to the left
		case WB_LEFTBREAK:
			return PrevWordEnd( text, textLength, pos );

		// finds the end-of-word delimiter to the right
		case WB_RIGHTBREAK:
			return NextWordEnd( text, textLength, pos );

		// finds the beginning of a word to the left (used during Ctrl+Left key processing)
		case WB_MOVEWORDLEFT:
			return PrevWordStartForMove( text, textLength, pos );

		// finds the beginning of a word to the right (used during Ctrl+Right key processing)
		case WB_MOVEWORDRIGHT:
			return NextWordStartForMove( text, textLength, pos );

		default:
			break;
	}

	return 0;
}

//-------------------------------------
// function : PrevWordStartForMove
// brief :
//     Basically this is same logic as PrevWordStart
//     except that this do not treat EOL code as a word.
//-------------------------------------
static int
PrevWordStartForMove( wchar_t* text, int textLength, int currentIndex )
{
	int pos;
	ClassifyCharProc isSameClass;
	
	pos = currentIndex - 1;
	if( pos <= 0 )
		return 0;
	
	// skip white spaces
	while( IsWhiteSpace(text, pos) )
	{
		pos--;
		if( pos <= 0 )
			return 0;
	}

	// if EOL code comes, return just before them
	if( IsEolCode(text, pos) )
	{
		// buf if already skipped white space, do not skip EOL
		if( pos != currentIndex-1 )
			return pos + 1;
		else
			return SkipBackwardOneEolCode( text, textLength, pos );
	}

	// proceed until the char category changes
	isSameClass = GetCharClassifyProc( text, pos );
	do
	{
		pos--;
		if( pos <= 0 )
			return 0;
	}
	while( isSameClass(text, pos) );
	
	return pos + 1;
}

//-------------------------------------
// function : NextWordStartForMove
// brief :
//     Basically this is same logic as NextWordStart
//     except that this do not treat EOL code as a word.
//-------------------------------------
static int
NextWordStartForMove( wchar_t* text, int textLength, int currentIndex )
{
	int pos;
	ClassifyCharProc isSameClass;
	
	pos = currentIndex;
	if( textLength < pos )
		return textLength;
	
	// if EOL code comes, return just after them
	if( IsEolCode(text, pos) )
	{
		return SkipForwardOneEolCode( text, textLength, pos );
	}

	// proceed until the char category changes
	isSameClass = GetCharClassifyProc( text, pos );
	do
	{
		pos++;
		if( textLength <= pos )
			return textLength;
	}
	while( isSameClass(text, pos) );
	
	// skip white spaces
	while( IsWhiteSpace(text, pos) )
	{
		pos++;
		if( textLength <= pos )
			return textLength;
	}

	return pos;
}


//-------------------------------------
// function : PrevWordStart
// brief : get start position of the previous word
//-------------------------------------
static int
PrevWordStart( wchar_t* text, int /*textLength*/, int currentIndex )
{
	int pos;
	ClassifyCharProc isSameClass;
	
	pos = currentIndex;
	if( pos <= 0 )
		return 0;
	
	// skip EOL codes and white spaces
	while( IsEolCode(text, pos) || IsWhiteSpace(text, pos) )
	{
		pos--;
		if( pos <= 0 )
			return 0;
	}

	// proceed until the char category changes
	isSameClass = GetCharClassifyProc( text, pos );
	do
	{
		pos--;
		if( pos <= 0 )
			return 0;
	}
	while( isSameClass(text, pos) );
	
	return pos + 1;
}

//-------------------------------------
// function : PrevWordEnd
// brief : get end position of the previous word
//-------------------------------------
static int
PrevWordEnd( wchar_t* text, int /*textLength*/, int currentIndex )
{
	int pos;
	ClassifyCharProc isSameClass;
	
	pos = currentIndex;
	if( pos <= 0 )
		return 0;
	
	// proceed until the char category changes
	isSameClass = GetCharClassifyProc( text, pos );
	do
	{
		pos--;
		if( pos <= 0 )
			return 0;
	}
	while( isSameClass(text, pos) );
	
	// skip EOL codes and white spaces
	while( IsEolCode(text, pos) || IsWhiteSpace(text, pos) )
	{
		pos--;
		if( pos <= 0 )
			return 0;
	}

	return pos + 1;
}

//-------------------------------------
// function : NextWordStart
// brief : get start position of the next word
//-------------------------------------
static int
NextWordStart( wchar_t* text, int textLength, int currentIndex )
{
	int pos;
	ClassifyCharProc isSameClass;
	
	pos = currentIndex - 1;
	if( textLength < pos )
		return textLength;
	
	// proceed until the char category changes
	isSameClass = GetCharClassifyProc( text, pos );
	do
	{
		pos++;
		if( textLength <= pos )
			return textLength;
	}
	while( isSameClass(text, pos) );
	
	// skip EOL codes and white spaces
	while( IsEolCode(text, pos) || IsWhiteSpace(text, pos) )
	{
		pos++;
		if( textLength <= pos )
			return textLength;
	}
	
	return pos;
}

//-------------------------------------
// function : NextWordEnd
// brief : get end position of the next word
//-------------------------------------
static int
NextWordEnd( wchar_t* text, int textLength, int currentIndex )
{
	int pos;
	ClassifyCharProc isSameClass;
	
	pos = currentIndex - 1;
	if( textLength < pos )
		return textLength;
	
	// skip EOL codes and white spaces
	while( IsEolCode(text, pos) || IsWhiteSpace(text, pos) )
	{
		pos++;
		if( textLength <= pos )
			return textLength;
	}

	// proceed until the char category changes
	isSameClass = GetCharClassifyProc( text, pos );
	do
	{
		pos++;
		if( textLength <= pos )
			return textLength;
	}
	while( isSameClass(text, pos) );
	
	return pos;
}

//-------------------------------------
// function : SkipForwardOneEolCode
// brief : skip forward only one EOL code
//-------------------------------------
static int
SkipForwardOneEolCode( wchar_t* text, int textLength, int currentIndex )
{
	int pos = currentIndex;
	wchar_t ch;
	
	ch = text[pos];
	if( ch == 0x0d ) // CR?
	{
		pos++;
		if( textLength <= pos )
			return textLength;
		
		if( ch == 0x0a ) // CR+LF?
		{
			pos++;
			if( textLength <= pos )
				return textLength;
		}
	}
	else if( ch == 0x0a ) // LF?
	{
		pos++;
		if( textLength <= pos )
			return textLength;
	}

	return pos;
}

//-------------------------------------
// function : SkipBackwardOneEolCode
// brief : skip backward only one EOL code
//-------------------------------------
static int
SkipBackwardOneEolCode( wchar_t* text, int /*textLength*/, int currentIndex )
{
	int pos = currentIndex;
	wchar_t ch;
	
	ch = text[pos];
	if( ch == 0x0d ) // CR?
	{
		pos--;
		if( pos <= 0 )
			return 0;
		
		if( ch == 0x0a ) // CR+LF?
		{
			pos--;
			if( pos <= 0 )
				return 0;
		}
	}
	else if( ch == 0x0a ) // LF?
	{
		pos--;
		if( pos <= 0 )
			return 0;
	}

	return pos;
}

//-------------------------------------
// function : GetCharClassifyProc
// brief : distinguish character class and get classification function for the class
//-------------------------------------
static ClassifyCharProc
GetCharClassifyProc( wchar_t* text, int pos )
{
	if( IsAlphabet(text, pos) )
		return &IsAlphabet;
	if( IsDigit(text, pos) )
		return &IsDigit;
	if( IsWhiteSpace(text, pos) )
		return &IsWhiteSpace;
	if( IsPunct(text, pos) )
		return &IsPunct;
	if( IsEolCode(text, pos) )
		return &IsEolCode;
	if( IsHiragana(text, pos) )
		return &IsHiragana;
	if( IsKatakana(text, pos) )
		return &IsKatakana;

	return &IsUnknown;
}

//-------------------------------------
// function : GetCharClassId
// brief : distinguish character class
//-------------------------------------
static int
GetCharClassId( wchar_t* text, int pos )
{
	ClassifyCharProc proc = GetCharClassifyProc( text, pos );

	if( proc == &IsAlphabet )
		return 1;
	else if( proc == &IsDigit )
		return 2;
	else if( proc == &IsWhiteSpace )
		return 3;
	else if( proc == &IsPunct )
		return 4;
	else if( proc == &IsEolCode )
		return 5;
	else if( proc == &IsHiragana )
		return 6;
	else if( proc == &IsKatakana )
		return 7;
	else /*if( proc == &IsUnknown )*/
		return 0;
}

static bool
IsAlphabet( wchar_t* text, int pos )
{
	wchar_t ch = text[pos];

	// is alphabet?
	if( ch == 0x5f ) // include '_'
		return true;
	if( 0x41 <= ch && ch <= 0x5a ) // half-width alphabets (1)
		return true;
	if( 0x61 <= ch && ch <= 0x7a ) // half-width alphabets (2)
		return true;
	if( 0xff21 <= ch && ch <= 0xff3a ) // full-width alphabets (1)
		return true;
	if( 0xff41 <= ch && ch <= 0xff5a ) // full-width alphabets (2)
		return true;

	return false;
}

static bool
IsDigit( wchar_t* text, int pos )
{
	wchar_t ch = text[pos];

	// is digit?
	if( ch == 0x66 || ch == 0x69 || ch == 0x6a || ch == 0x6c || ch == 0x78 )
		return true; // include some pre/postfixes: f, i, j, l, x
	if( 0x30 <= ch && ch <= 0x39 ) // half-width digits
		return true;
	if( 0xff10 <= ch && ch <= 0xff19 ) // full-width digits
		return true;
	if( ch == 0x2e ) // '.' for float literal
		return true;

	return false;
}

static bool
IsPunct( wchar_t* text, int pos )
{
	wchar_t ch = text[pos];

	if( 0x21 <= ch && ch <= 0x2f )
		return true;
	if( 0x3a <= ch && ch <= 0x40 )
		return true;
	if( 0x5b <= ch && ch <= 0x60 )
		return true;
	if( 0x7b <= ch && ch <= 0x7f )
		return true;
	if( 0x3001 <= ch && ch <= 0x303f )
		return true; // CJK punctuation marks
	if( 0xff01 <= ch && ch <= 0xff0f )
		return true; // "Full width" forms (1)
	if( 0xff1a <= ch && ch <= 0xff20 )
		return true; // "Full width" forms (2)
	if( 0xff3b <= ch && ch <= 0xff40 )
		return true; // "Full width" forms (3)
	if( 0xff5b <= ch && ch <= 0xff65 )
		return true; // "Full width" forms (4)
	if( 0xffe0 <= ch && ch <= 0xffee )
		return true; // "Full width" forms (5)
	
	return false;
}

static bool
IsWhiteSpace( wchar_t* text, int pos )
{
	wchar_t ch = text[pos];

	if( ch == 0x0a || ch == 0x0d ) // exclude EOL chars
		return false;
	if( 0x00 <= ch && ch <= 0x20 )
		return true;
	if( ch == 0x3000 ) // full-width space
		return true;

	return false;
}

static bool
IsEolCode( wchar_t* text, int pos )
{
	wchar_t ch = text[pos];

	if( ch == 0x0a || ch == 0x0d )
		return true;
	
	return false;
}

static bool
IsHiragana( wchar_t* text, int pos )
{
	wchar_t ch = text[pos];

	if( ch == 0x30fc )
		return true;
	if( 0x3041 <= ch && ch <= 0x309f )
		return true;
	
	return false;
}

static bool
IsKatakana( wchar_t* text, int pos )
{
	wchar_t ch = text[pos];

	if( 0x30a0 <= ch && ch <= 0x30ff )
		return true;
	
	return false;
}

static bool
IsUnknown( wchar_t* text, int pos )
{
	if( IsAlphabet(text, pos) )
		return false;
	if( IsDigit(text, pos) )
		return false;
	if( IsWhiteSpace(text, pos) )
		return false;
	if( IsPunct(text, pos) )
		return false;
	if( IsEolCode(text, pos) )
		return false;
	if( IsHiragana(text, pos) )
		return false;
	if( IsKatakana(text, pos) )
		return false;
	
	return true;
}

/**********************************************************
Copyright (C) 2006-2009 YAMAMOTO Suguru

This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any damages
arising from the use of this software.

Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it
freely, subject to the following restrictions:

1. The origin of this software must not be misrepresented; you must not
   claim that you wrote the original software. If you use this software
   in a product, an acknowledgment in the product documentation would be
   appreciated but is not required.

2. Altered source versions must be plainly marked as such, and must not be
   misrepresented as being the original software.

3. This notice may not be removed or altered from any source distribution.
**********************************************************/
