Calculating word frequency

This is a simple application to calculate word frequency in a MultiByte String. MultiByte String relates to Unicode representation of String (used in Jawi or Arabic).

calculate word frequency in a MultiByte String

The PHP code;


<html>
<head>
<title>Maklumat tulisan Arab</title>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
</head>

<script type="text/javascript" src="keyboard.js" charset="UTF-8"></script>
<link rel="stylesheet" type="text/css" href="keyboard.css">
<script type="text/javascript">

var currentFocussedElement = null;

function enterLetter(sender)
{
if(currentFocussedElement)
currentFocussedElement.value += sender.innerHTML;
}

</script>
</head>
<body>


<form action="index.php" method="post">

<input type="text" name="fname" size="50" tabindex="1" onFocus="currentFocussedElement = this" class="keyboardInput"/>
<input type="submit" />
</form>

<hr>

<?php $text = $_POST["fname"];

if ($text!=NULL){
//$words = str_word_count($text, 1); // use this function if you only want ASCII
$words = utf8_str_word_count($text, 1); // use this function if you care about i18n

$frequency = array_count_values($words);

?>
<table border=1>
<tr> <td>perkataan </td><td>frekuensi </td></tr>
<?php
while ($eachword = current($frequency)) {
echo "<tr> <td>".key($frequency)." </td><td>". $eachword." </td></tr>";
//echo key($frequency);

next($frequency);
}


?>
</table>
<?php
}//end if NULL
?>

<?php
//tokenizer function
function utf8_str_word_count($string, $format = 0, $charlist = null)
{
$result = array();

if (preg_match_all('~[\p{L}\p{Mn}\p{Pd}\'\x{2019}' . preg_quote($charlist, '~') . ']+~u', $string, $result) > 0)
{
if (array_key_exists(0, $result) === true)
{
$result = $result[0];
}
}

if ($format == 0)
{
$result = count($result);

}

return $result;
}
?>



Popular Posts