'
[View the raw source]
<?php
/*
* $Id: java2html.inc,v 1.0.6 2003/11/29 15:36:54 manolov Exp $
*
* Java2HTML converter
* converts a java source code into HTML with syntax highlighting
*
* Copyright (C) 2003 Borislav Manolov
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* author: Borislav Manolov
* e-mail: b.manolov at web.de
* home page: http://purl.oclc.org/NET/manolov/
*/
define('JDOC_COMMENT', '°j');
define('COMMENT', '°c');
define('STRING', '°s');
define('CHAR', '°h');
// names for the style classes
define('LINES_CLASS', 'ln');
define('KEYWORD_CLASS', 'key');
define('JDOC_CLASS', 'jdoc');
define('COMMENT_CLASS', 'comm');
define('STRING_CLASS', 'str');
define('CHAR_CLASS', 'ch');
define('BRACES_CLASS', 'b1');
define('PARENTHESES_CLASS', 'b2');
define('SQUARE_BRACKETS_CLASS', 'b3');
define('NUMBERS_CLASS', 'num');
define('OPERATORS_CLASS', 'op');
// tabs are replaced by spaces, default = 2
define('TAB_SIZE', ' ');
/*
* main function
* @param $filename : string - source file
* @param $put_line_nums : boolean - enable line numbers
* @return : string - the highlighted source
*/
function java2html($filename, $put_line_nums = true) {
global $jdoccomments, $comments, $strings, $chars,
$spaces, $lcount, $next_boundary;
$keywords_java = array(
'class', 'abstract', 'assert', 'boolean', 'break', 'byte', 'case',
'catch', 'char', 'const', 'continue', 'default', 'do', 'double',
'else', 'extends', 'final', 'finally', 'float', 'for', 'if',
'implements', 'import', 'instanceof', 'int', 'interface', 'long',
'native', 'new', 'package', 'private', 'protected', 'public',
'return', 'short', 'static', 'strictfp', 'super', 'switch',
'synchronized', 'this', 'throw', 'throws', 'transient', 'try',
'void', 'volatile', 'while', 'goto', 'null', 'true', 'false');
$s = array( '&', '>', '<', "\t", "\r\n", "\r");
$r = array('&', '>', '<', TAB_SIZE, "\n", "\n");
if (strrpos($filename, '.gz')) {
$arr = gzfile($filename);
} else {
// from PHP 4 >= 4.3.0 can be used file_get_contents() : string
$arr = file($filename);
}
if ($put_line_nums) {
$linesCount = count($arr);
}
$buf = implode('', $arr);
$buf = str_replace($s, $r, $buf) . "\n";
// 1. first extract comments, strings and chars,
// 2. highlight keywords, numbers, operators, brackets
// 3. add highlighted comments, strings and chars
$count = 0;
// extract javadoc comments
while ( ($pos = strpos($buf, '/**', $pos)) !== false ) {
$tpos = $pos;
// substr from the position of '/**' to the beginning of the line
$str = '';
while ($buf[$tpos] !== "\n" && $tpos) {
$str .= $buf[$tpos--];
}
if (countQuotes($str) % 2 != 0 || strpos($str, '//') !== false) {
// there are odd number quotes to the front of line
// => the found '/**' is inside of a string
// or there is '//' to the front of line
// => the found '/**' is inside of a one-line comment
$pos += 3;
continue;
}
$endPos = strpos($buf, '*/', $pos+2);
$jdoccomments[] = substr($buf, $pos, $endPos+2-$pos);
$buf = substr_replace($buf, JDOC_COMMENT . $count++,
$pos, $endPos+2-$pos);
}
$count = 0;
$pos = 0;
// extract normal multiline comments
while ( ($pos = strpos($buf, '/*', $pos)) !== false ) {
$tpos = $pos;
// substr from the position of "/*" to the beginning of the line
$str = '';
while ($buf[$tpos] !== "\n" && $tpos) {
$str .= $buf[$tpos--];
}
if (countQuotes($str)%2 != 0 || strpos($str, '//') !== false) {
// there are odd number quotes to the front of line
// => the found '/*' is inside of a string
// or there is '//' to the front of line
// => the found '/*' is inside of a one-line comment
$pos += 2;
continue;
}
$endPos = strpos($buf, '*/', $pos+2);
$comments[] = substr($buf, $pos, $endPos+2-$pos);
$buf = substr_replace($buf, COMMENT . $count++, $pos, $endPos+2-$pos);
}
$pos = 0;
// extract one-line comments
while ( ($pos = strpos($buf, '//', $pos)) !== false ) {
$tpos = $pos;
// substr from the position of "//" to the beginning of the line
$str = '';
while ($buf[$tpos] !== "\n" && $tpos) {
$str .= $buf[$tpos--];
}
if (countQuotes($str)%2 != 0) {
// there are odd number quotes to the front of line
// therefore the found '//' is inside of a string
$pos += 2;
continue;
}
$endPos = strpos($buf, "\n", $pos);
$comments[] = substr($buf, $pos, $endPos-$pos);
$buf = substr_replace($buf, COMMENT . $count++, $pos, $endPos-$pos);
}
$count = 0;
$pos = 0;
// extract strings
while ( ($pos = strpos($buf, '"', $pos)) !== false ) {
if ($buf[$pos-1] == '\'') {
$pos++;
continue;
}
$endPos = $pos;
do {
$esc_quote = false;
$endPos = strpos($buf, '"', ++$endPos);
if ( $buf[$endPos-1] == '\\' ) {
$esc_quote = true;
for ($i=$endPos-2; $i>$pos; $i--) {
if ( $buf[$i] == '\\' ) {
$esc_quote = !$esc_quote;
} else {
break;
}
}
}
// while it is an escaped quote
} while ($esc_quote);
$strings[] = substr($buf, $pos, $endPos+1-$pos);
$buf = substr_replace($buf, STRING . $count++, $pos, $endPos+1-$pos);
}
$count = 0;
$pos = 0;
// extract chars
while ( ($pos=strpos($buf, '\'', $pos)) !== false ) {
$endPos = $pos;
do {
$endPos = strpos($buf, '\'', ++$endPos);
// while it is an escaped apos
} while ($buf[$endPos-1] === '\\' && $buf[$endPos-2] !== '\\');
$chars[] = substr($buf, $pos, $endPos+1-$pos);
$buf = substr_replace($buf, CHAR . $count++, $pos, $endPos+1-$pos);
}
// highlight keywords
foreach ($keywords_java as $keyword) {
$buf =
preg_replace('/\b'.$keyword.'\b/',
'<span class="'. KEYWORD_CLASS .'">' . $keyword . '</span>',
$buf);
}
// highlight all brackets
$s = array('{', '}', '(', ')', '[', ']');
$r = array(
'<span class="'.BRACES_CLASS.'">{</span>',
'<span class="'.BRACES_CLASS.'">}</span>',
'<span class="'.PARENTHESES_CLASS.'">(</span>',
'<span class="'.PARENTHESES_CLASS.'">)</span>',
'<span class="'.SQUARE_BRACKETS_CLASS.'">[</span>',
'<span class="'.SQUARE_BRACKETS_CLASS.'">]</span>'
);
$buf = str_replace($s, $r, $buf);
// used to escape '+' and '-' by numbers
function escape($matches) {
$str = str_replace(array('+', '-'), array('+', '-'), $matches[0]);
return '<span class="'. NUMBERS_CLASS .'">'. $str .'</span>';
}
$dec_pattern = '\b\d+[LlFf]?\b';
$hex_pattern = '\b0[xX][\da-fA-F]+[LlFf]?\b';
$oct_pattern = '\b0[0-7]+[LlFf]?\b';
$scientific_pattern = '\b\d+[Ee]{1,1}[-+]?\d+\b';
$float_pattern = '\b\d*\.\d+[FfDd]?\b|\b\d+\.\d*[FfDd]?\b';
$buf = preg_replace_callback('/' .
$dec_pattern .'|'.
$hex_pattern .'|'.
$oct_pattern .'|'.
$scientific_pattern .'|'.
$float_pattern .
'/',
'escape', $buf);
// highlight all symbols from $s
$s = array('|=', '&=', '!=', '!', '~', '>=', '<=', '>>',
'<=', '>=', '==', '>', '<',
'^=', '+=', '-=', '*=', '/=', '%=', '&', '|',
'^', '+', '-', '*', '%', '?', ':');
$r = array();
foreach ($s as $search) {
$r[] = '<b class="'. OPERATORS_CLASS .'">'.$search.'</b>';
}
$buf = str_replace($s, $r, $buf);
$buf = preg_replace(
array('|[^<]/|', '/(\w)(\.)(\w)/'),
array('<b class="'. OPERATORS_CLASS .'">/</b>',
'$1<b class="'. OPERATORS_CLASS .'">$2</b>$3'),
$buf);
// add javadoc comments
$buf = preg_replace_callback('/'. JDOC_COMMENT .'(\d+)/',
'jdoccomments_callback', $buf);
// add normal comments
$buf = preg_replace_callback('/'. COMMENT .'(\d+)/',
'comments_callback', $buf);
// add strings
$buf = preg_replace_callback('/'.STRING.'(\d+)/', 'strings_callback', $buf);
// add chars
$buf = preg_replace_callback('/'. CHAR .'(\d+)/', 'chars_callback', $buf);
// add lines numbers
if ($put_line_nums) {
// spaces added in front of the line number
$spaces = str_repeat(' ', strlen($linesCount) - 1);
$lcount = 1;
$next_boundary = 10;
$buf = '<tt class="'. LINES_CLASS .'">'. $spaces .'1</tt> '. trim($buf);
$buf = preg_replace_callback("/\n/", 'lines_callback', $buf);
$buf .= "\n";
}
return $buf;
}
// counts the non-escaped quotes in a string
// (" " " \" ") -> 4
function countQuotes($str) {
$count = 0;
$pos = 0;
while ( ($pos=strpos($str, '"', $pos)) !== false ) {
if ($str[$pos-1] === '\\') {
if ($str[$pos-2] === '\\') {
$count++;
}
} else {
$count++;
}
$pos++;
}
return $count;
}
// spaces added in front of the line numbers
$spaces = '';
$lcount = 1;
$next_boundary = 10;
function lines_callback($matches) {
global $spaces, $lcount, $next_boundary;
if (++$lcount == $next_boundary) {
$spaces = substr($spaces, 1);
$next_boundary *= 10;
}
return "\n".'<tt class="'.LINES_CLASS.'">'. $spaces . $lcount . '</tt> ';
}
// buffers for javadoc comments, normal comments, strings and chars
$jdoccomments = array();
$comments = array();
$strings = array();
$chars = array();
// callback functions used by java2html function
function jdoccomments_callback($matches) {
global $jdoccomments;
return '<span class="'. JDOC_CLASS .'">'. $jdoccomments[$matches[1]] .
'</span>';
}
function comments_callback($matches) {
global $comments;
return '<span class="'. COMMENT_CLASS .'">'. $comments[$matches[1]] .
'</span>';
}
function strings_callback($matches) {
global $strings;
return '<span class="'. STRING_CLASS .'">'.
str_replace('"', '"', $strings[$matches[1]]) . '</span>';
}
function chars_callback($matches) {
global $chars;
return '<span class="'. CHAR_CLASS .'">'. $chars[$matches[1]] . '</span>';
}
?>