package com.mindprod.compactor;
import com.mindprod.commandline.CommandLine;
import com.mindprod.filter.AllButSVNDirectoriesFilter;
import com.mindprod.filter.ExtensionListFilter;
import com.mindprod.hunkio.HunkIO;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileWriter;
import java.io.IOException;
import java.util.regex.Pattern;
import static java.lang.System.err;
import static java.lang.System.out;
/**
* Compacts HTML by removing unnecessary white space.
* <p/>
* We always compact whitespace inside and outside comments.
* <p/>
* We don't consolidate tags. e.g.
* <span class="x">this </span><span class="x">and that</span> can be collapsed
* to <span class="x">this and that</span>.
* <p/>
* We don't convert tags to lower case e.g. <BR> to <br>
* <p/>
* We leave all comments in place. If ever such a feature is implemented, it must
* not strip SSI comments. It may or may not leave macro comments.
* <p/>
* We do not remove macro generations. You can do that with StripGenerated.
* <p/>
* We do not remove the macro comments.
* <p/>
* We remove space and NLs on the right of <div><dt><li><h?><ol><table><tbody><td><th><thead><tr><ul> tags.
* <p/>
* We remove space and NLs on the lift of </div></dt></li></h?></ol></table></tbody></td></th></thead></tr></ul> tags.
* <p/>
* We always remove lead and trailing spaces from lines.
* <p/>
* We compact spaces in side HTML text, tags and comments.
* <p/>
* We leave spaces as is inside <pre>...</pre> and inside quoted tag parameters.
* <p/>
* We convert " to &quot; > to &gt; when used in raw text.
* <p/>
* We don't tokenize to convert to CBF, compact binary format. The catch here is web
* browsers can't read the result without a plug-in. This would result in a major
* compaction. Perhaps the XML folk will eventually get disgusted with their obese
* format and XHTML can inherit a now compact form.
* <p/>
* We don't do any LZW compression. the catch is, browsers can't read this without a
* special plug-in.
*
* @author Roedy Green, Canadian Mind Products
* @version 3.3 2011-11-15 add compactStringAsNeeded
* @since 2006
*/
public class Compactor
{
/**
* undisplayed copyright notice
*/
@SuppressWarnings( { "UnusedDeclaration" } )
public static final String EMBEDDED_COPYRIGHT =
"Copyright: (c) 1999-2012 Roedy Green, Canadian Mind Products, http://mindprod.com";
/**
* date this version was released.
*/
@SuppressWarnings( { "UnusedDeclaration" } )
private static final String RELEASE_DATE = "2011-11-15";
/**
* how to use the command line
*/
private static final String USAGE = "Compactor needs a filename.html or a space-separated list of filenames, with optional -s -q -v switches.";
/**
* embedded version string.
*/
@SuppressWarnings( { "UnusedDeclaration" } )
public static final String VERSION_STRING = "3.3";
/**
* <!-- generated comment pattern
*/
private static final Pattern GENERATED_PATTERN = Pattern.compile( "\\s*generated\\s" );
/**
* <!-- macro comment pattern
*/
private static final Pattern MACRO_PATTERN = Pattern.compile( "\\s*macro\\s" );
/**
* <!-- /generated comment pattern
*/
private static final Pattern SLASH_GENERATED_PATTERN = Pattern.compile( "\\s*/generated\\s" );
/**
* <!--# SSI comment pattern
*/
private static final Pattern SSI_PATTERN = Pattern.compile( "#" );
/**
* compact and tidy one file.
*
* @param quiet true if want progress messages suppressed
* @param fileBeingProcessed the file currently being processed.
*
* @throws IOException Suppress IntelliJ Code Analyse that wants to make this private.
* @noinspection WeakerAccess, SameParameterValue, StringEquality
*/
public static void compactFile( boolean quiet, File fileBeingProcessed ) throws IOException
{
if ( !quiet )
{
out.print( " compacting " + fileBeingProcessed.getName() + " " );
}
if ( !( fileBeingProcessed.getName().endsWith( ".html" )
|| fileBeingProcessed
.getName().endsWith( ".htm" ) ) )
{
err.println( "Cannot compact: "
+ fileBeingProcessed.getName()
+ "not .html file" );
return;
}
String big = HunkIO.readEntireFile( fileBeingProcessed );
String result = compactStringKeepingMacrosAndComments( big, fileBeingProcessed.getPath() );
if ( result == big )
{
if ( !quiet )
{
out.println( "-" );
}
return;
}
if ( !quiet )
{
out.println( "*" );
}
final File tempFile = HunkIO.createTempFile( "temp", ".tmp", fileBeingProcessed );
FileWriter emit = new FileWriter( tempFile );
emit.write( result );
emit.close();
if ( !fileBeingProcessed.delete() )
{
throw new IOException( "Unable to delete the old file " + fileBeingProcessed.getAbsolutePath() );
}
if ( !tempFile.renameTo( fileBeingProcessed ) )
{
throw new IOException( "Unable to rename the output to the old file name " + fileBeingProcessed.getAbsolutePath() );
}
}
/**
* compact a String as needed
*
* @param uncompacted uncompacted string
* @param where where this string came from, used in error messages to help you track down source
* @param how *=compactStringStrippingMacrosAndComments,
* +=compactStringKeepingMacrosAndComments
* -=does nothing
* Q=Quick If first 400 chars contain a double space, compactStringKeepingMacrosAndComments, otherwise do nothing.
*
* @return compacted String
*/
public static String compactStringAsNeeded( final String uncompacted, final String where, final char how )
{
switch ( how )
{
case '*':
return Compactor.compactStringStrippingMacrosAndComments( uncompacted, where );
case '+':
return Compactor.compactStringKeepingMacrosAndComments( uncompacted, where );
case '-':
return uncompacted;
case 'Q':
case 'q':
final String test = ( uncompacted.length() < 400 ) ? uncompacted : uncompacted.substring( 0, 400 );
if ( test.contains( " " + " " ) )
{
return uncompacted;
}
else
{
return Compactor.compactStringKeepingMacrosAndComments( uncompacted, where );
}
default:
assert false : "invalid Compactor.compactStringAsNeeded.how " + how + " It must be one of * + - Q";
return uncompacted;
}
}
/**
* Remove excess whitespace from HTML represented by string.
*
* @param big the String to compact.
* @param where used in error messages to indicate where the error occurred, usually the name of the file being
* compacted.
*
* @return the compacted String, big itself if nothing changed.
*/
public static String compactStringKeepingMacrosAndComments( final String big, final String where )
{
return HTMLState.compactString( big, where, true,
MACRO_PATTERN,
SLASH_GENERATED_PATTERN );
}
/**
* Remove excess whitespace from HTML represented by string.
*
* @param big the String to compact.
* @param where used in error messages to indicate where the error occurred, usually the name of the file being
* compacted.
*
* @return the compacted String, big itself if nothing changed.
*/
public static String compactStringKeepingMacrosStrippingComments( final String big, final String where )
{
return HTMLState.compactString( big, where, false,
MACRO_PATTERN,
SSI_PATTERN,
GENERATED_PATTERN,
SLASH_GENERATED_PATTERN );
}
/**
* Remove excess whitespace from HTML represented by string, strip all macros and comments.
*
* @param big the String to compact.
* @param where used in error messages to indicate where the error occurred, usually the name of the file being
* compacted.
*
* @return the compacted String, big itself if nothing changed.
*/
public static String compactStringStrippingMacrosAndComments( final String big, final String where )
{
return HTMLState.compactString( big, where, false,
SSI_PATTERN
);
}
/**
* constructor
*/
public Compactor()
{
}
/**
* compacts HTML files.
*
* @param args names of files to process, dirs, files, -s, *.*, no wildcards.
*/
public static void main( String[] args )
{
out.println( "Gathering html files to compact..." );
CommandLine commandLine = new CommandLine( args,
new AllButSVNDirectoriesFilter(),
new ExtensionListFilter( "html" ) );
final boolean quiet = commandLine.isQuiet();
if ( commandLine.size() == 0 )
{
throw new IllegalArgumentException( "No files found to process\n" + USAGE );
}
final Compactor compactor = new Compactor();
for ( File file : commandLine )
{
try
{
compactFile( quiet, file );
}
catch ( FileNotFoundException e )
{
err.println( "Error: "
+ file.getAbsolutePath()
+ " not found." );
}
catch ( Exception e )
{
err.println();
e.printStackTrace( err );
err.println( " in file "
+ file.getAbsolutePath() );
err.println();
}
}
}
}