/*
 * $Id: HtmlSaxParser.java,v 1.3 1999/04/04 19:34:43 db Exp $
 * 
 * Copyright (c) 1999 Sun Microsystems, Inc. All Rights Reserved.
 * 
 * This software is the confidential and proprietary information of Sun
 * Microsystems, Inc. ("Confidential Information").  You shall not
 * disclose such Confidential Information and shall use it only in
 * accordance with the terms of the license agreement you entered into
 * with Sun.
 * 
 * SUN MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF THE
 * SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
 * PURPOSE, OR NON-INFRINGEMENT. SUN SHALL NOT BE LIABLE FOR ANY DAMAGES
 * SUFFERED BY LICENSEE AS A RESULT OF USING, MODIFYING OR DISTRIBUTING
 * THIS SOFTWARE OR ITS DERIVATIVES.
 */

package com.sun.xml.xhtml;

import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.IOException;
import java.io.Reader;

import java.util.Enumeration;
import java.util.Locale;

import javax.swing.text.BadLocationException;
import javax.swing.text.MutableAttributeSet;
import javax.swing.text.html.HTMLEditorKit.ParserCallback;
import javax.swing.text.html.HTML.Tag;
import javax.swing.text.html.parser.*;

import org.xml.sax.*;
import org.xml.sax.helpers.AttributeListImpl;

import com.sun.xml.parser.LexicalEventListener;
import com.sun.xml.parser.Resolver;

// debug only:
// import com.sun.xml.tree.XmlDocumentBuilder;


/**
 * This is a SAX parser which processes HTML, producing an event stream
 * corresponding roughly to what would be produced when parsing a
 * well formed (but not in general valid) XHTML document.  It may be
 * useful when beginning to convert HTML content into (valid) XHTML
 * content, or when using DOM APIs to manipulate such HTML content.
 *
 * <P> The parser uses the HTML parser built into the SWING library, and
 * converts the events it reports into SAX event callbacks.  At this writing,
 * that parser understands HTML 3.2, a number of HTML 4.0 constructs at
 * least in part, and recovers from many common HTML errors in a manner
 * which is compatible with the handling of popular web browsers.
 *
 * <P> Note that the case of element and attribute names exposed by
 * this parser is always <em>normalized to lower case</em>.  This policy
 * is the one adopted by current W3C working drafts of <em>XHTML</em>.
 * It differs from that adopted by the Level 1 DOM HTML support, which
 * normalizes element names to uppercase and uppercases only the initial
 * letter of attribute names.
 *
 * <P> Also, note that when writing out HTML, you may want to avoid
 * character encodings such as UTF-8, since HTML processors are less
 * consistent in their support of internationalized text than are XML
 * processors.  ISO-8859-1 is safe, and newer HTML browsers tend to
 * handle other character encodings better than older ones.
 *
 * @see com.sun.xml.parser.LexicalEventListener
 * @see com.sun.xml.tree.XmlDocumentBuilder
 * @see javax.swing.text.html.parser.ParserDelegator
 *
 * @author David Brownell
 * @version $Revision: 1.3 $
 */
public class HtmlSaxParser
    extends	ParserCallback
    implements	org.xml.sax.Parser
{
    private ParserDelegator		parser;
    private AttributeListImpl		attributes;
    private boolean			disabled;

    private DocumentHandler		docHandler;
    private DTDHandler			dtdHandler;
    private ErrorHandler		errHandler;
    private EntityResolver		resolver;
    private Locale			locale;
    private LexicalEventListener	lexicalHandler;

    
    /**
     * Constructs a SAX parser accessing the HTML parser built in to
     * the Swing subsystem.
     */
    public HtmlSaxParser ()
    {
	HandlerBase base = new HandlerBase ();

	docHandler = base;
	dtdHandler = base;
	errHandler = base;
	resolver = base;

	parser = new ParserDelegator ();
	attributes = new AttributeListImpl ();
    }


    // SAX parser methods

    /**
     * Parses the HTML document provided through the specified
     * input source.
     */
    public void parse (InputSource in) throws SAXException, IOException
    {
	Reader	reader = in.getCharacterStream ();

	if (reader == null) {
	    InputStream	input = in.getByteStream ();

	    //
	    // XXX must check the character set handling here,
	    // along both paths !!!!
	    //
	    if (input != null) {
		String encoding = in.getEncoding ();
		if (encoding == null)
		    encoding = "8859_1";
		reader = new InputStreamReader (input, encoding);

	    } else if (in.getSystemId () != null) {
		in = Resolver.createInputSource (
		    new java.net.URL (in.getSystemId ()), false);
		reader = in.getCharacterStream ();

	    } else
		throw new SAXException (
		    "Bad InputSource: no Reader, InputStream, or URI");
	}

	docHandler.startDocument ();
	try {
	    parser.parse (reader, this, true);
	} catch (DeferredException x) {
	    if (x.ioe != null)
		throw x.ioe;
	    if (x.x instanceof SAXParseException)
		errHandler.fatalError ((SAXParseException) x.x);
	    throw x.x;
	}
	docHandler.endDocument ();
    }


    /**
     * Parses the HTML document provided at the specified URI.
     */
    public void parse (String uri) throws SAXException, IOException
    {
	parse (new InputSource (uri));
    }

    /**
     * Assigns the document handler through which all HTML content will
     * be reported.  This is the primary application interface to this
     * parser.
     *
     * <P> If this handler implements the lexical event listener interface,
     * it will also be notified of comments.
     */
    public void setDocumentHandler (DocumentHandler handler)
    {
	docHandler = handler;
	if (handler instanceof LexicalEventListener)
	    lexicalHandler = (LexicalEventListener) handler;
	else
	    lexicalHandler = null;
    }

    /**
     * Not useful with any HTML parser, since HTML does not use
     * notations or unparsed entities.
     */
    public void setDTDHandler (DTDHandler handler)
	{ dtdHandler = handler; }

    /**
     * Not useful with this HTML parser, since it does not read
     * doctype declarations.
     */
    public void setEntityResolver (EntityResolver resolver)
	{ this.resolver = resolver; }

    /**
     * Provides the error handler used to report fatal errors
     * and warnings.
     */
    public void setErrorHandler (ErrorHandler handler)
	{ errHandler = handler; }

    /**
     * Not useful with this HTML parser, since no control over the
     * language of its diagnostics is provided by the Java runtime.
     */
    public void setLocale (Locale locale)
	{ this.locale = locale; }
    
    
    // Swing HTML ParserCallback methods

    /**
     * Not intended for application use.
     * This is a Swing HTML ParserCallback method.
     */
    public void flush () throws BadLocationException
    {
    }

    /**
     * Not intended for application use.
     * This is a Swing HTML ParserCallback method.
     */
    public void handleText (char data [], int pos)
    {
	if (disabled)
	    return;

	try {
	    // System.out.println ("text");
	    docHandler.characters (data, 0, data.length);
	} catch (SAXException e) {
	    DeferredException x = new DeferredException ();
	    x.x = e;
	    disabled = true;
	    throw x;
	}
    }

    /**
     * Not intended for application use.
     * This is a Swing HTML ParserCallback method.
     */
    public void comment (char data [], int pos)
    {
	if (disabled || lexicalHandler == null)
	    return;

	try {
	    // System.out.println ("comment");
	    lexicalHandler.comment (new String (data));
	} catch (SAXException e) {
	    DeferredException x = new DeferredException ();
	    x.x = e;
	    disabled = true;
	    throw x;
	}
    }

    /**
     * Not intended for application use.
     * This is a Swing HTML ParserCallback method.
     */
    public void handleStartTag (
    	Tag			tag,
    	MutableAttributeSet 	attributes,
	int			pos
    ) {
	if (disabled)
	    return;

	try {
	    // System.out.println ("start: " + tag.toString ());
	    docHandler.startElement (tag.toString (),
	    			toAttributeList (attributes));
	} catch (SAXException e) {
	    DeferredException x = new DeferredException ();
	    x.x = e;
	    disabled = true;
	    throw x;
	}
    }

    /**
     * Not intended for application use.
     * This is a Swing HTML ParserCallback method.
     */
    public void handleEndTag (
    	Tag			tag,
	int			pos
    ) {
	if (disabled)
	    return;

	try {
	    // System.out.println ("end: " + tag.toString ());
	    docHandler.endElement (tag.toString ());
	} catch (SAXException e) {
	    DeferredException x = new DeferredException ();
	    x.x = e;
	    disabled = true;
	    throw x;
	}
    }

    /**
     * Not intended for application use.
     * This is a Swing HTML ParserCallback method.
     */
    public void handleSimpleTag (
    	Tag			tag,
    	MutableAttributeSet 	attributes,
	int			pos
    ) {
	if (disabled)
	    return;

	try {
	    String	tagName = tag.toString ();

	    // System.out.println ("start simple: " + tagName);
	    docHandler.startElement (tagName, toAttributeList (attributes));
	    // System.out.println ("end simple: " + tagName);
	    docHandler.endElement (tagName);
	} catch (SAXException e) {
	    DeferredException x = new DeferredException ();
	    x.x = e;
	    disabled = true;
	    throw x;
	}
    }

    /**
     * Not intended for application use.
     * This is a Swing HTML ParserCallback method.
     */
    public void handleError (String diagnostic, int pos)
    {
	try {
	    // System.out.println ("ERR: " + diagnostic);
	    errHandler.warning (new SAXParseException (
		diagnostic,
		null, null,
		-1, -1
		));
	} catch (SAXException e) {
	    DeferredException x = new DeferredException ();
	    x.x = e;;
	    disabled = true;
	    throw x;
	}
    }

    //
    // Convert Swing's model of an attribute set to SAX's.
    //
    private AttributeListImpl toAttributeList (MutableAttributeSet attrs)
    {
	attributes.clear ();
	if (attrs.getAttributeCount () != 0) {
	    for (Enumeration e = attrs.getAttributeNames ();
		    e.hasMoreElements ();
		    ) {
		Object	key = e.nextElement ();
		String	name = key.toString ();
		String	value = attrs.getAttribute (key).toString ();

		attributes.addAttribute (name, "CDATA", value);
	    }
	}
	return attributes;
    }

    //
    // We wrap an exception (only one!) in this class and throw them.
    // The HotJava parser seems to continue reporting events after we
    // throw ... so remember to disable ongoing reporting!!
    //
    static class DeferredException extends RuntimeException
    {
	SAXException	x;
	IOException	ioe;
    }


/*
    // TESTING ONLY

    public static void main (String argv [])
    {
	HtmlSaxParser		parser = new HtmlSaxParser ();
	XmlDocumentBuilder	builder = new XmlDocumentBuilder ();

	try {
	    parser.setDocumentHandler (builder);
	    parser.parse (argv [0]);
	    builder.getDocument ().write (System.out);

	} catch (Throwable t) {
	    t.printStackTrace ();
	}
    }
*/
}
