This is a simple application to calculate word frequency in a MultiByte String. MultiByte String relates to Unicode representation of String (used in Jawi or Arabic).
The PHP code;
<html>
<head>
<title>Maklumat tulisan Arab</title>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
</head>
<script type="text/javascript" src="keyboard.js" charset="UTF-8"></script>
<link rel="stylesheet" type="text/css" href="keyboard.css">
<script type="text/javascript">
var currentFocussedElement = null;
function enterLetter(sender)
{
if(currentFocussedElement)
currentFocussedElement.value += sender.innerHTML;
}
</script>
</head>
<body>
<form action="index.php" method="post">
<input type="text" name="fname" size="50" tabindex="1" onFocus="currentFocussedElement = this" class="keyboardInput"/>
<input type="submit" />
</form>
<hr>
<?php $text = $_POST["fname"];
if ($text!=NULL){
//$words = str_word_count($text, 1); // use this function if you only want ASCII
$words = utf8_str_word_count($text, 1); // use this function if you care about i18n
$frequency = array_count_values($words);
?>
<table border=1>
<tr> <td>perkataan </td><td>frekuensi </td></tr>
<?php
while ($eachword = current($frequency)) {
echo "<tr> <td>".key($frequency)." </td><td>". $eachword." </td></tr>";
//echo key($frequency);
next($frequency);
}
?>
</table>
<?php
}//end if NULL
?>
<?php
//tokenizer function
function utf8_str_word_count($string, $format = 0, $charlist = null)
{
$result = array();
if (preg_match_all('~[\p{L}\p{Mn}\p{Pd}\'\x{2019}' . preg_quote($charlist, '~') . ']+~u', $string, $result) > 0)
{
if (array_key_exists(0, $result) === true)
{
$result = $result[0];
}
}
if ($format == 0)
{
$result = count($result);
}
return $result;
}
?>
Comments
Post a Comment