/* Farsi Converter Class ActionScript 2.0 A class to convert decimal unicode character to hex unicode character, fix the line break problem and enabling persian characters in embeded dynamic text boxes Version 1.0 Fouad Amiri http://fouad.ir fouad@parspake.com 16 March 2009 Possible improvements: We can improve the line break intelligence Example: var farsi_text:farsi_converter = new farsi_converter(); my_txt.text = farsi_text.text_to_hex(my_txt.text, 40); */ class farsi_converter { private var farsi_hex:Array = new Array(); private var farsi_uni:Array = new Array(); private var farsi_separators:Array = new Array(); function farsi_converter() { this.farsi_separators = Array("1570", "1571", "1572", "1573", "1575", "1577", "1583", "1584", "1585", "1586", "1595", "1596", "1597", "1598", "1599", "1600", "1608", "1688"); this.farsi_uni = Array("1570", "1571", "1572", "1573", "1574", "1575", "1576", "1577", "1578", "1579", "1580", "1581", "1582", "1583", "1584", "1585", "1586", "1587", "1588", "1589", "1590", "1591", "1592", "1593", "1594", "1595", "1596", "1597", "1598", "1599", "1600", "1601", "1602", "1603", "1604", "1605", "1606", "1607", "1608", "1609", "1610", "1611", "1662", "1670", "1711", "1688", "1705", "1740"); this.farsi_hex = Array(['\ufe81', '\ufe82', '\0', '\0'], ['\ufe83', '\ufe84', '\0', '\0'], ['\ufe85', '\ufe86', '\0', '\0'], ['\ufe87', '\ufe88', '\0', '\0'], ['\ufe89', '\ufe8a', '\ufe8c', '\ufe8d'], ['\ufe8d', '\ufe8e', '\0', '\0'], //Alef ['\ufe8f', '\ufe90', '\ufe92', '\ufe91'], //Beh ['\ufe93', '\ufe94', '\0', '\0'], //Teh Marbuteh ['\ufe95', '\ufe96', '\ufe98', '\ufe97'], //Teh ['\ufe99', '\ufe9a', '\ufe9c', '\ufe9b'], //Seh (Theh) ['\ufe9d', '\ufe9e', '\ufea0', '\ufe9f'], //Jim ['\ufea1', '\ufea2', '\ufea4', '\ufea3'], //Heh ['\ufea5', '\ufea6', '\ufea8', '\ufea7'], //Kheh ['\ufea9', '\ufeaa', '\0', '\0'], //Dal ['\ufeab', '\ufeac', '\0', '\0'], //Zal (Thal) ['\ufead', '\ufeae', '\0', '\0'], //Reh ['\ufeaf', '\ufeb0', '\0', '\0'], //Zeh ['\ufeb1', '\ufeb2', '\ufeb4', '\ufeb3'], //Sin ['\ufeb5', '\ufeb6', '\ufeb8', '\ufeb7'], //Shin ['\ufeb9', '\ufeba', '\ufebc', '\ufebb'], //Sad ['\ufebd', '\ufebe', '\ufec0', '\ufebf'], //Zad (dad) ['\ufec1', '\ufec2', '\ufec4', '\ufec3'], //Ta ['\ufec5', '\ufec6', '\ufec8', '\ufec7'], //Za ['\ufec9', '\ufeca', '\ufecc', '\ufecb'], //Ain ['\ufecd', '\ufece', '\ufed0', '\ufecf'], //Zain ['\0', '\0', '\0', '\0'], ['\0', '\0', '\0', '\0'], ['\0', '\0', '\0', '\0'], ['\0', '\0', '\0', '\0'], ['\0', '\0', '\0', '\0'], ['\0', '\0', '\0', '\0'], ['\ufed1', '\ufed2', '\ufed4', '\ufed3'], //Feh ['\ufed5', '\ufed6', '\ufed8', '\ufed7'], //Ghaf ['\ufed9', '\ufeda', '\ufedc', '\ufedb'], //Kaf ['\ufedd', '\ufede', '\ufee0', '\ufedf'], //Lam ['\ufee1', '\ufee2', '\ufee4', '\ufee3'], //Mim ['\ufee5', '\ufee6', '\ufee8', '\ufee7'], //Noon ['\ufee9', '\ufeea', '\ufeec', '\ufeeb'], //Heh ['\ufeed', '\ufeee', '\0', '\0'], //Vav ['\ufeef', '\ufef0', '\ufef4', '\ufef3'], //Yeh (Alef Maksura) ['\ufef1', '\ufef2', '\ufef4', '\ufef3'], ['\ufef1', '\ufef2', '\ufef4', '\ufef3'], ['\ufb56', '\ufb57', '\ufb59', '\ufb58'], // Peh ['\ufb7a', '\ufb7b', '\ufb7d', '\ufb7c'], // Che ['\ufb92', '\ufb93', '\ufb95', '\ufb94'], // Gaf ['\ufb8a', '\ufb8b', '\0', '\0'], // Zhe ['\ufed9', '\ufeda', '\ufedc', '\ufedb'], // Farsi Kaf ['\ufeef', '\ufef0', '\ufef4', '\ufef3']); //Farse Ye } /* Some characters act like a separate in persian language These characters are none-persian characters, punctuation marks and 18 unicode characters listed in farsi_separators array @return: true, if the character acts liek a seprator */ private function is_separator(char):Boolean { var len:Number = length(this.farsi_uni); var flg:Boolean = true; for(var i = 0; i < len; i++) { if(char == this.farsi_uni[i]) { flg = false; break; } } if(flg) { return true; } len = length(this.farsi_separators); for(var i = 0; i < len; i++) { if(char == this.farsi_separators[i]) { return true; } } return false; } /* Detects wether the letter is punctuation mark or non-persian @return true: if the letter aint between the 32 persian letters */ private function is_none_persian(char:Number):Boolean { var len:Number = length(this.farsi_uni); var flg:Boolean = true; for(var i = 0; i < len; i++) { if(char == this.farsi_uni[i]) { flg = false; break; } } if(flg) { return true; } else { return false; } } /* Detects the persian letter's type Persian letters have 2 or 4 types. They could be alone, at the end of the word, in the middle of a word or in the begining of a word Letters like ب have all 4 types and letters like د only have 2 of them */ private function type_detector(input:String, char_at:Number):Number { if(this.is_separator(input.charCodeAt(char_at)) && (this.is_separator(input.charCodeAt(char_at - 1)) || this.is_none_persian(input.charCodeAt(char_at + 1)))) { return 0; //Alone } else if(this.is_separator(input.charCodeAt(char_at)) && !this.is_separator(input.charCodeAt(char_at - 1)) && !this.is_none_persian(input.charCodeAt(char_at + 1))) { return 1; //Alone } else { if((this.is_separator(input.charCodeAt(char_at - 1)) && this.is_none_persian(input.charCodeAt(char_at + 1))) || (this.is_none_persian(input.charCodeAt(char_at - 1)) && this.is_none_persian(input.charCodeAt(char_at + 1)))) { //The character is alone return 0; //Alone } else if((!this.is_separator(input.charCodeAt(char_at - 1)) && this.is_none_persian(input.charCodeAt(char_at + 1)))) { //The character is sepratable return 1; //End } else if(!this.is_separator(input.charCodeAt(char_at - 1)) && !this.is_none_persian(input.charCodeAt(char_at + 1))) { //The character is mid return 2; //Mid } else if(this.is_separator(input.charCodeAt(char_at - 1)) && !this.is_none_persian(input.charCodeAt(char_at + 1))) { //The character is sepratable return 3; //Begin } } return -1; } /* The main and the only public function Returns the hexed text @params: input: the normal persian text step: the steps of breaking each line, Lines are broken unexpectedly in persian texts so we should make a step to break each line when step is reached. In this function the break would be at the point the step is reached and a punctuation mark or a space exists */ public function text_to_hex(input:String, step:Number):String { input = " " + input + " "; //Adds spaces to the left and right sides of the text to have the characters algorithm safe we'll trim the text later var output:Array = new Array(); var len:Number = length(input); var step_counter:Number = 0; var lines:Array = new Array(); var final_output:String = ""; var f_output:String = ""; var lenx:Number = 0; for(var i = 0; i < len; i++) { step_counter++; output.push(this.find_char(input, i, input.charCodeAt(i), this.type_detector(input, i))); //Make a line break //Now, this if will be invoked when the step is reached, and the current character isn't a persian letter //Or when the remaining characters are less than the step (Final step) if((step_counter > step && this.is_none_persian(input.charCodeAt(i))) || i >= len - 1) { output.reverse(); output.push("\n"); lenx = length(output); f_output = ""; for(var j = 0; j < lenx; j++) { f_output += output[j]; //Merging the letters in the line } output = new Array();; lines.push(f_output); step_counter = 0; } } len = lines.length; for(var i = 0; i < len; i++) { //Merging the lines final_output += lines[i]; } return final_output.substring(1, final_output.length - 1); } /* Gets the character's unicode and returns the hex code if the character is persian Returns the letter if the character is not persian or the character is a punctuation mark */ private function find_char(input:String, char_at:Number,char_code:Number, char_type:Number):String { var len:Number = length(this.farsi_uni); var num:Number = -1; for(var i = 0; i < len; i++) { if(char_code == this.farsi_uni[i]) { num = i; break; } } if(num != -1) { return this.farsi_hex[num][char_type]; //The char is persian } else { return input.charAt(char_at); //The char aint persian } } }