/*
 * [Http.java]
 *
 * Summary: Base class to Post, Get, Head, Probe and Chase to send/receive HTTP messages.
 *
 * Copyright: (c) 1998-2012 Roedy Green, Canadian Mind Products, http://mindprod.com
 *
 * Licence: This software may be copied and used freely for any purpose but military.
 *          http://mindprod.com/contact/nonmil.html
 *
 * Requires: JDK 1.5+
 *
 * Created with: JetBrains IntelliJ IDEA IDE http://www.jetbrains.com/idea/
 *
 * Version History:
 *  1.0 1998-01-01 initial version
 *  1.1 2007-07-19 improved handling of responseCode
 *  1.2 2007-07-27 use UTF-8 instead of 8859_1.
 *  1.3 2007-08-24 readStringBlocking, readBytesBlocking, encoding on Get
 *  1.4 2007-09-26 add TIMEOUT
 *  1.5 2007-12-30 add alternate get and post methods that take a full URL.
 *  1.6 2008-01-14 add gzip option on read
 *  1.7 2008-07-25 add configurable User-Agent, add Base Http class.
 *  1.8 2008-07-27 handle case where URL given was not HTTP
 *  1.9 2008-08-22 support accept-charset, accept-encoding and accept-language. Fix bugs in gzip support.
 *  2.0 2009-02-20 major refactoring. separate setParms and setPostParms. new send method. Post can have both types of parm.
 *  2.1 2010-02-07 new methods Post.setBody Http.setRequestProperties.
 *  2.2 2010-04-05 new method getURL
 *  2.3 2010-11-14 new method setInstanceFollowRedirects
 *  2.4 2011-02-03 change documentation to reflect that the HTTP package handled both http: and https: equally well.
 *  2.5 2011-04-01 allow gzip compression. Update User agent.
 *  2.6 2011-05-01 getRawResponseMessage and getResponseMessage(uses standard wordings).
 *  2.7 2011-05-19 change all encoding parms from String to Charset type for tighter parameter checking.
 *  2.8 2011-08-30 update User Agent
 *  2.9 2011-11-09 add configuring getter/setters for Accept-Property, Accept-Charset etc.
 */
package com.mindprod.http;

import com.mindprod.common15.STA;

import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLConnection;
import java.net.URLEncoder;
import java.nio.charset.Charset;
import java.nio.charset.IllegalCharsetNameException;
import java.util.List;
import java.util.Locale;
import java.util.Map;

import static java.lang.System.err;
import static java.lang.System.out;

/**
 * Base class to Post, Get, Head, Probe and Chase to send/receive HTTP messages.
 * <p/>
 * Originally based on work by Jonathan Revusky
 *
 * @author Roedy Green, Canadian Mind Products
 * @version 2.9 2011-11-09 add configuring getter/setters for Accept-Property, Accept-Charset etc.
 * @since 1998-01-01
 */
abstract class Http
    {
    // ------------------------------ CONSTANTS ------------------------------

    /**
     * true if want extra debugging output.  If you change this to true, make sure you set it back to false before
     * distributing http or any package that uses it.
     */
    static final boolean DEBUGGING = false;

    /**
     * message length to presume when no length given
     */
    static final int DEFAULT_LENGTH = 32 * 1024;

    /**
     * responseCode to give if is no proper one
     */
    static final int DEFAULT_RESPONSE_CODE = -1;

    /**
     * responseMessage to give if is no proper one. Might mean for example that you tried to use http: on https: URL.
     */
    static final String DEFAULT_RESPONSE_MESSAGE = "no connect";

    /**
     * undisplayed copyright notice
     */
    public static final String EMBEDDED_COPYRIGHT =
            "Copyright: (c) 1998-2012 Roedy Green, Canadian Mind Products, http://mindprod.com";

    /**
     * when package released.
     *
     * @noinspection UnusedDeclaration
     */
    private static final String RELEASE_DATE = "2011-11-09";

    /**
     * embedded version string.
     */
    @SuppressWarnings( { "UnusedDeclaration" } )
    public static final String VERSION_STRING = "2.9";

    /**
     * used to convert responseCode to responseMessage.
     */
    private static final String[] responseCodeLookup;

    /**
     * encoding for UTF-8
     */
    public static final Charset UTF8Charset = Charset.forName( "UTF-8" );

    // ------------------------------ FIELDS ------------------------------

    /**
     * parameters we send with the command. c.f. PostParms sent in message body with a post
     */
    private String[] parms;

    /**
     * additional request properties for the connection, pairs key, value
     */
    private String[] requestProperties = new String[ 0 ];

    /**
     * Accept-Charset for header
     */
    private String acceptCharset = "iso-8859-1,utf-8,utf-16;q=0.7,*;q=0.3"; // give preference to charsets listed

    /**
     * Accept-Encoding for header, when debugging avoid gzip to make Wireshark sniffing easier. We don't handle deflate.
     * Deflate is one of the PKZip compressors.
     * Currently no SetAcceptEncoding method to override this default.
     */
    private String acceptEncoding = DEBUGGING ? "identity" : "gzip,x-gzip,identity";

    /**
     * Accept property for header : application/xhtml+xml,application/xml
     */
    private String acceptProperty = "application/octet-stream," +
                                    "application/x-java-jnlp-file," +
                                    "application/x-java-serialized-object," +
                                    "application/xhtml+xml," +
                                    "application/xml," +
                                    "application/zip," +
                                    "image/gif," +
                                    "image/jpeg," +
                                    "image/png," +
                                    "text/css," +
                                    "text/html," +
                                    "text/plain," +
                                    "text/x-java-source," +
                                    "text/xml" +
                                    ";q=0.9,*/*;q=0.8";   // give pref to mimes listed
    // possibly add image/webp, image/x-xbitmap

    /**
     * the page containing the URL we pretend to be.
     * By default null, for none.
     */
    private String referer = null;

    /**
     * responseCode in words from most recent post
     */
    String responseMessage;

    /**
     * the browser we pretend to be, by default Firefox 8.0.1, can be overridden with setter.
     *
     * @see <a href="http://mindprod.com/jgloss/http.html">details on User-Agent</a>
     */
    private String userAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:8.0.1) Gecko/20100101 Firefox/8.0.1";

    /**
     * URL, including encoded get Parameters.
     */
    URL url;

    /**
     * true=auto follow redirects, false=treat redirect as error, just read  first leg redirect message..
     */
    private boolean followRedirects = true;

    /**
     * Allow 50 seconds to connect, measured in millis.
     */
    private int connectTimeout = 50 * 1000;

    /**
     * Allow 40 seconds for a read to go without progress, measured in millis.
     */
    private int readTimeout = 40 * 1000;

    /**
     * responseCode from most recent post
     */
    int responseCode;

    // -------------------------- PUBLIC INSTANCE  METHODS --------------------------

    /**
     * get current Accept-Charset
     *
     * @return e.g. "iso-8859-1,utf-8,utf-16;q=0.7,*;q=0.3";
     */
    public String getAcceptCharset()
        {
        return acceptCharset;
        }

    /**
     * change default Accept-Charset
     *
     * @param acceptCharset e.hg.  "iso-8859-1,utf-8,utf-16;q=0.7,*;q=0.3
     */
    public void setAcceptCharset( final String acceptCharset )
        {
        this.acceptCharset = acceptCharset;
        }

    /**
     * get current Accept-Encoding
     *
     * @return e.g. "gzip,x-gzip,identity"
     */
    public String getAcceptEncoding()
        {
        return acceptEncoding;
        }

    /**
     * change the default encoding Accept-Encoding
     *
     * @param acceptEncoding e.g. ""gzip,x-gzip,identity""
     */
    public void setAcceptEncoding( final String acceptEncoding )
        {
        this.acceptEncoding = acceptEncoding;
        }

    /**
     * get current AcceptProperty
     *
     * @return e.g. "gzip,x-gzip,identity"
     */
    public String getAcceptProperty()
        {
        return acceptProperty;
        }

    /**
     * change the default Accept-Property MIME types
     *
     * @param acceptProperty e.g. "gzip,x-gzip,identity"
     */
    public void setAcceptProperty( final String acceptProperty )
        {
        this.acceptProperty = acceptProperty;
        }

    /**
     * get current connect time out in ms
     *
     * @return connect timeout is ms.
     */
    public int getConnectTimeout()
        {
        return connectTimeout;
        }

    /**
     * override the default connect timeout of 50 seconds
     *
     * @param connectTimeout timeout to connect in ms. Note int, not long.
     */
    public void setConnectTimeout( int connectTimeout )
        {
        this.connectTimeout = connectTimeout;
        }

    /**
     * responseCode from most recent post/get exactly as received from the server
     *
     * @return responseCode
     * @see #getResponseMessage
     */
    public String getRawResponseMessage()
        {
        return responseMessage;
        }

    /**
     * get current read time out in ms
     *
     * @return read timeout is ms.
     */
    public int getReadTimeout()
        {
        return readTimeout;
        }

    /**
     * override the default read timeout of 40 seconds
     *
     * @param readTimeout timeout to connect int ms. Note int, not long.
     */
    public void setReadTimeout( int readTimeout )
        {
        this.readTimeout = readTimeout;
        }

    /**
     * ges the Referrer ie. the name of a web page this request ostensibly came from.
     *
     * @return referrer e.g "http://mindprod.com/index.html", null for none.
     * @see <a href="http://mindprod.com/jgloss/http.html">details on Referrer</a>
     */
    public String getReferer()
        {
        return referer;
        }

    /**
     * set the Referrer ie. the name of a web page this request ostensibly came from.
     * Note that the word Referrer is spelled incorrectly as Referer  the HTTP spec.
     *
     * @param referer e.g "http://mindprod.com/index.html", null for none.
     *
     * @see <a href="http://mindprod.com/jgloss/http.html">details on Referrer</a>
     */
    public void setReferer( String referer )
        {
        this.referer = referer;
        }

    /**
     * responseCode from most recent post/get
     * Meaning of various codes are described at HttpURLConnection and at http://mindprod.com/jgloss/http.html
     *
     * @return responseCode
     * @see java.net.HttpURLConnection
     */
    public int getResponseCode()
        {
        return responseCode;
        }

    /**
     * responseCode from most recent post/get tidied to standard form
     *
     * @return responseCode
     *         * @see #getRawResponseMessage
     */
    public String getResponseMessage()
        {
        return responseCodeToResponseMessage( responseCode, responseMessage );
        }

    /**
     * Get URL for this connection.
     *
     * @return URL, including encoded GET Parameters, but not POST parameters.
     */
    public URL getURL()
        {
        return url;
        }

    /**
     * get current User-Agent
     *
     * @return e.g. "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:8.0) Gecko/20100101 Firefox/8.0"
     */
    public String getUserAgent()
        {
        return userAgent;
        }

    /**
     * override the default User-Agent
     *
     * @param userAgent User-Agent  a browser uses in an HTTP header to identify itself.
     *                  null for no User Agent.  By default you get Firefox.
     *
     * @see <a href="http://mindprod.com/jgloss/http.html">details on User-Agent</a>
     */
    public void setUserAgent( String userAgent )
        {
        this.userAgent = userAgent;
        }

    /**
     * do we follow redirects on just show first leg
     *
     * @return true if will follow redirects to the end
     */
    public boolean isFollowRedirects()
        {
        return followRedirects;
        }

    /**
     * control whether redirects are automatically followed or treated as errors.
     *
     * @param followRedirects true=auto follow, false=treat as error..
     *
     * @see java.net.HttpURLConnection#setInstanceFollowRedirects(boolean)
     */
    public void setInstanceFollowRedirects( boolean followRedirects )
        {
        this.followRedirects = followRedirects;
        }

    /**
     * set the parms that will be send tacked onto the end of the URL, get-style
     *
     * @param parms 0..n strings to be send as parameter, alternating keyword/value
     *
     * @see Post#setPostParms(String...)
     */
    public void setParms( final String... parms )
        {
        assert ( parms.length & 1 ) == 0 : "must have an even number of parms, keyword=value";
        this.parms = parms;
        }

    /**
     * set additional requestProperties. Replaced previous set.
     *
     * @param requestProperties pairs: key value.
     */
    public void setRequestProperties( String... requestProperties )
        {
        if ( ( requestProperties.length & 1 ) != 0 )
            {
            throw new IllegalArgumentException( "setRequestProperties needs an even number of parameters: key,value" );
            }
        this.requestProperties = requestProperties;
        }

    // -------------------------- STATIC METHODS --------------------------

    static
        {
        responseCodeLookup = new String[ 505 - 200 + 1 ];
        means( 200, "ok" );
        means( 201, "created" );
        means( 202, "accepted" );
        means( 203, "non-authoritative information" );
        means( 204, "no content" );
        means( 205, "reset content" );
        means( 206, "partial content" );
        means( 300, "multiple choices" );
        means( 301, "object permanently moved" );
        means( 302, "object temporarily moved" );
        means( 303, "access method changed" );
        means( 304, "not modified since last access" );
        means( 305, "use proxy" );
        means( 400, "bad request" );
        means( 401, "unauthorized. must logon first." );
        means( 402, "payment required" );
        means( 403, "forbidden" );
        means( 404, "page not found" );
        means( 405, "method not allowed" );
        means( 406, "not acceptable" );
        means( 407, "proxy authentication required" );
        means( 408, "request time-out" );
        means( 409, "conflict" );
        means( 410, "gone" );
        means( 411, "length required" );
        means( 412, "precondition failed" );
        means( 413, "request entity too large" );
        means( 414, "request-uri too large" );
        means( 415, "unsupported media type" );
        means( 500, "server error" );
        means( 500, "internal server error" );
        means( 501, "not implemented" );
        means( 502, "bad gateway" );
        means( 503, "service unavailable" );
        means( 504, "gateway timeout" );
        means( 505, "http version not supported" );
        }

    /**
     * display the contents of the header fields key: value, value, value
     *
     * @param title Title to decorated the dump.
     * @param urlc  HTTP connection
     */
    protected static void dumpHeaders( final String title, final HttpURLConnection urlc )
        {
        out.println( title );
        Map<String, List<String>> pairs = urlc.getHeaderFields();
        for ( Map.Entry<String, List<String>> entry : pairs.entrySet() )
            {
            // this does not require an expensive get lookup to find the value.
            String key = entry.getKey();
            List<String> values = entry.getValue();
            out.print( key + ":" );
            for ( String value : values )
                {
                out.print( " [" + value + "]" );
                }
            out.println();
            }
        }

    /**
     * encode a set of parms for the command, separated with ? = & = *
     * This method does not automatically include the result in the message sent to the host.
     *
     * @param encoding for URLEncoder
     * @param parms    0..n strings to be send as parameter, alternating keyword/value
     *
     * @return all the parms in one string encoded with lead ?
     * @throws java.io.UnsupportedEncodingException
     *          if bad encoding
     */
    private static String encodeParms( Charset encoding, String... parms ) throws UnsupportedEncodingException
        {
        // for post, will usually have empty list of parms for command.
        if ( parms == null || parms.length == 0 )
            {
            return "";
            }
        assert ( parms.length & 1 ) == 0 : "must have an even number of parms, keyword=value";
        int estLength = 10; // allow a few slots for multibyte chars
        for ( String p : parms )
            {
            estLength += p.length() + 1;
            }
        final StringBuilder sb = new StringBuilder( estLength );
        for ( int i = 0; i < parms.length - 1; i += 2 )
            {
            sb.append( i == 0 ? "?" : "&" );
            sb.append( URLEncoder.encode( parms[ i ], encoding.name()
                    /* encoding */ ) );
            sb.append( '=' );
            sb.append( URLEncoder.encode( parms[ i + 1 ], encoding.name()
                    /* encoding */ ) );
            }
        return sb.toString();
        }

    /**
     * Guess what charSet encoding the response will be in.
     *
     * @param contentType     contents of content type field
     * @param defaultEncoding charSet to use if empty content type field, e.g. "UTF-8"
     *
     * @return charsetEncoding to use e.g. "UTF-8"
     */
    static Charset guessCharSet( final String contentType, final Charset defaultEncoding )
        {
        if ( contentType == null )
            {
            return defaultEncoding;
            }
        else
            {
            //  Content-Type: text/html; charset=utf-8
            int place = contentType.lastIndexOf( "charset=" );
            if ( place >= 0 )
                {
                String charset = null;
                try
                    {
                    charset = contentType.substring( place + "charset=".length() ).trim().toUpperCase();
                    // trim possible enclosing "
                    charset = STA.trimLeading( STA.trimTrailing( charset, '\"' ), '\"' );
                    return Charset.forName( charset );
                    }
                catch ( IllegalCharsetNameException e )
                    {
                    err.println( "Warning: unrecognised charset " + charset + " using " + defaultEncoding + " instead." );
                    return defaultEncoding;
                    }
                }
            else
                {
                return defaultEncoding;
                }
            }
        }

    /**
     * used to build lookup responseCode to responseMessage
     *
     * @param responseCode response code
     * @param meaning      corresponding meaning of the response code, the responseMessage
     */
    private static void means( int responseCode, String meaning )
        {
        responseCodeLookup[ responseCode - 200 ] = meaning;
        }

    /**
     * convert responseCode to a standard responseMessage
     *
     * @param responseCode       code e.g. 200 for OK
     * @param rawResponseMessage raw response message from server
     *
     * @return String describing the response message.
     */
    private static String responseCodeToResponseMessage( int responseCode, String rawResponseMessage )
        {
        if ( 200 <= responseCode && responseCode <= 505 )
            {
            final String responseMessage = responseCodeLookup[ responseCode - 200 ];
            if ( responseMessage != null )
                {
                return responseMessage;
                }
            }
        if ( responseCode == -1 )
            {
            return "no connect";
            }
        // unknown code
        return rawResponseMessage;
        }

    // --------------------------- CONSTRUCTORS ---------------------------

    /**
     * no public instantiation.  Just a base class.
     */
    Http()
        {
        }

    // -------------------------- OTHER METHODS --------------------------

    /**
     * get the parms for the command encoded, separated with ? = & = *
     *
     * @param encoding for URLEncoder
     *
     * @return all the parms in one string encoded with lead ?
     * @throws java.io.UnsupportedEncodingException
     *          if bad encoding
     */
    String getEncodedParms( Charset encoding ) throws UnsupportedEncodingException
        {
        return encodeParms( encoding, this.parms );
        }

    /**
     * process the response from the request we sent the server
     *
     * @param defaultCharSet Encoding to use to interpret the result.
     * @param urlc           the HttpURLConnection, all ready to go but for the connect.
     *
     * @return content of the response, decompressed, decoded.
     * @throws java.io.IOException if trouble reading the stream.
     */
    String processResponse( Charset defaultCharSet, HttpURLConnection urlc )
            throws IOException
        {
        // send the message.
        urlc.connect(); // ignored if already connected.
        // getResponseCode will block until the server responds.
        // save responseCode for later retrieval
        responseCode = urlc.getResponseCode();
        responseMessage = urlc.getResponseMessage();
        // get size of message. -1 means comes in an indeterminate number of chunks.
        int estimatedLength = urlc.getContentLength();
        if ( estimatedLength <= 0 )
            {
            // quite common for no length field
            estimatedLength = DEFAULT_LENGTH;
            }
        // InputStream gives us the raw bytes. We must decompress and decode the 8-bit chars..
        // actually a sun.net.www.protocol.http.HttpURLConnection.HttpInputStream
        final InputStream is = urlc.getInputStream();
        final String contentType = urlc.getContentType();
        final Charset charSet = guessCharSet( contentType, defaultCharSet );
        // content encoding might be null. We don't handle deflate or Unix compress.
        final boolean gzipped = "gzip".equals( urlc.getContentEncoding() )
                                || "x-gzip".equals( urlc.getContentEncoding() );
        // R E A D
        String result = Read.readStringBlocking( is,
                estimatedLength,
                gzipped,
                charSet );
        if ( DEBUGGING )
            {
            out.println( "--------------------------------" );
            out.println( "ResponseCode:" + responseCode );
            out.println( "ResponseMessage:" + responseMessage );
            out.println( "ContentType:" + contentType );
            out.println( "CharSet:" + charSet );
            out.println( "ContentEncoding:" + urlc.getContentEncoding() );
            out.println( "Result:" + ( result == null ? "null" : result.substring( 0,
                    Math.min( result.length(), 300 ) ) ) );
            }
        // C L O S E
        is.close();
        urlc.disconnect();
        return result;
        }

    /**
     * set up the standard properties on the connection
     *
     * @param urlc Connection we are setting up.
     */
    protected void setStandardProperties( URLConnection urlc )
        {
        urlc.setConnectTimeout( connectTimeout );
        urlc.setReadTimeout( readTimeout );
        if ( userAgent != null )
            {
            urlc.setRequestProperty( "User-Agent", userAgent );
            }
        if ( urlc instanceof HttpURLConnection )
            {
            ( ( HttpURLConnection ) urlc ).setInstanceFollowRedirects( followRedirects );
            }
        if ( referer != null )
            {
            // note HTTP spells referrer incorrectly.
            urlc.setRequestProperty( "Referer", referer );
            }
        for ( int i = 0; i < requestProperties.length; i += 2 )
            {
            urlc.setRequestProperty( requestProperties[ i ], requestProperties[ i + 1 ] );
            }
        urlc.setRequestProperty( "Accept", acceptProperty );
        urlc.setRequestProperty( "Accept-Charset", acceptCharset );
        // no deflate, could be added later if we can find code to handle it.
        urlc.setRequestProperty( "Accept-Encoding", acceptEncoding );
        // relaxed, prefer English
        final Locale locale = Locale.getDefault();
        // e.g. en-CA,en;q=0.9
        urlc.setRequestProperty( "Accept-Language", locale.toString().replace( '_', '-' ) + "," + locale.getLanguage() + ";q=0.9" );
        }
    }