util/parser/Sax2XercesParser.cpp

Go to the documentation of this file.
00001 /*-----------------------------------------------------------------------------
00002 Name:      Sax2XercesParser.cpp
00003 Project:   xmlBlaster.org
00004 Copyright: xmlBlaster.org, see xmlBlaster-LICENSE file
00005 Comment:   Default handling of Sax callbacks
00006 -----------------------------------------------------------------------------*/
00007 
00008 #ifndef _UTIL_PARSER_SAX2XERCESPARSER_C
00009 #define _UTIL_PARSER_SAX2XERCESPARSER_C
00010 
00011 #if defined(XMLBLASTER_MSXML_PLUGIN)
00012 #  error Implement Microsoft XML parser for /DXMLBLASTER_MSXML_PLUGIN
00013 #else  // XMLBLASTER_XERCES_PLUGIN
00014 
00015 #if defined(_WIN32)
00016   #pragma warning(disable:4786)
00017 #endif
00018 
00019 #include <util/parser/Sax2XercesParser.h>
00020 #include <xercesc/sax/SAXException.hpp>
00021 #include <xercesc/sax2/SAX2XMLReader.hpp>
00022 #include <xercesc/sax2/XMLReaderFactory.hpp>
00023 #include <xercesc/util/PlatformUtils.hpp>
00024 #include <xercesc/framework/MemBufInputSource.hpp>
00025 #include <util/XmlBlasterException.h>
00026 #include <util/Global.h>
00027 #include <util/lexical_cast.h>
00028 #include <iostream>
00029 //#include <cstdlib> //<stdlib.h>
00030 
00031 namespace org { namespace xmlBlaster { namespace util { namespace parser {
00032 
00033 using namespace std;
00034 
00035 static const int ENCODERBUFFERSIZE = 16*1024;
00036 
00037 Sax2Parser::Sax2Parser(org::xmlBlaster::util::Global& global, XmlHandlerBase *handler) : 
00038     I_Parser(handler), ME("Sax2Parser"), global_(global), log_(global.getLog("org.xmlBlaster.util.xml")),
00039     xmlBlasterTranscoder_(0)
00040 {
00041    if (log_.call()) log_.trace(ME, "Creating new Sax2Parser");
00042 
00043    //"UTF-8" is currently not supported with our std::string usage!
00044    encoding_ = global_.getProperty().getStringProperty("xmlBlaster/encoding", "iso-8859-1");
00045 
00046    XMLTransService::Codes resCode;
00047    xmlBlasterTranscoder_ = XMLPlatformUtils::fgTransService->makeNewTranscoderFor(encoding_.c_str(), resCode, ENCODERBUFFERSIZE);
00048    if (resCode != 0/*XMLTransService::Codes::Ok*/) {
00049       log_.error(ME, "Creation of XMLTranscoder with encoding='" + encoding_ + "' failed with error code " + lexical_cast<string>((int)resCode) +
00050                      ". Please check your SAX parser setting '-xmlBlaster/encoding'");
00051       throw XmlBlasterException(USER_CONFIGURATION, ME, "Creation of XMLTranscoder with encoding='" + encoding_ + "' failed with " + lexical_cast<string>((int)resCode));
00052    }
00053    else {
00054       if (log_.trace()) log_.trace(ME, "Created XMLTranscoder res=" + lexical_cast<string>((int)resCode) + " with encoding=" + encoding_);
00055    }
00056 }
00057 
00058 Sax2Parser::~Sax2Parser()
00059 {
00060    delete xmlBlasterTranscoder_;
00061 }
00062 
00063 std::string Sax2Parser::usage() 
00064 {
00065    std::string text = string("");
00066    //text += string("\n");
00067    text += string("\nThe xerces SAX XML parser plugin configuration:");
00068    text += string("\n   -xmlBlaster/encoding [iso-8859-1]");
00069    text += string("\n                       The parser encoding to use for xmlBlaster specific QoS and key SAX parsing");
00070    text += string("\n");
00071    return text;
00072 }
00073 
00074 void Sax2Parser::init(const string &xmlLiteral) 
00075 {
00076    if (xmlLiteral.size() > 0) {
00077       parse(xmlLiteral);
00078    }
00079 }
00080       
00085 void Sax2Parser::parse(const string &xmlData) 
00086 {
00087    //if (log_.call()) log_.call(ME, "parse");
00088    //if (log_.trace()) log_.trace(ME, string("parse content:'") + xmlData + string("'"));
00089  
00090    SAX2XMLReader *parser = NULL;
00091    //XMLCh* encodingHelper = NULL;
00092    try {
00093       parser = XMLReaderFactory::createXMLReader();
00094       parser->setContentHandler(this);
00095       parser->setErrorHandler(this);
00096       parser->setLexicalHandler(this);
00097 
00098       // "UTF-8"  "iso-8859-1"
00099       //string xmlData1 = string("<?xml version=\"1.0\" encoding=\""+encoding_+"\"?>\n") + xmlData;
00100       const string &xmlData1 = xmlData;
00101       //log_.info(ME, "Parsing now: " + xmlData1);
00102       
00103       MemBufInputSource inSource((const XMLByte*)xmlData1.c_str(), (unsigned int)(xmlData1.size()), "xmlBlaster", false);
00104       
00105       XMLCh tempStr[100];
00106       XMLString::transcode(encoding_.c_str(), tempStr, 99);
00107       inSource.setEncoding(tempStr);
00108       //encodingHelper = XMLString::transcode(encoding.c_str());
00109       //inSource.setEncoding(encodingHelper);
00110       //Sax2Parser::releaseXMLCh(&encodingHelper);
00111 
00112       parser->parse(inSource);
00113       delete parser;
00114    }
00115    catch (StopParseException&) {
00116       // If it does not work, it could be wrapped into SAXParseException
00117       log_.error(ME, string("StopParseException: ") +
00118                               "Parsing execution stopped half the way ");
00119       if (log_.trace()) {
00120          string help = XmlBlasterException::getStackTrace();
00121          log_.plain(ME, help);
00122       }
00123       delete parser; // just in case it did not 
00124       return;
00125    }
00126    catch (XmlBlasterException& ex) {
00127       throw ex;
00128    }
00129    catch (SAXParseException &err) {
00130       string loc = getLocationString(err) + string(": ") + getStringValue(err.getMessage());
00131       delete parser;
00132       throw XmlBlasterException(USER_ILLEGALARGUMENT, ME + "::parse", string("SAXParseException") + loc);
00133    }
00134    catch (SAXNotRecognizedException &err) {
00135       string msg = getStringValue(err.getMessage());
00136       delete parser;
00137       throw XmlBlasterException(USER_ILLEGALARGUMENT, ME + "::parse", string("SAXNotRecognizedException: ") + msg);
00138         }
00139    catch (SAXNotSupportedException &err) {
00140       string msg = getStringValue(err.getMessage());
00141       delete parser;
00142       throw XmlBlasterException(USER_ILLEGALARGUMENT, ME + "::parse", string("SAXNotSupportedException: ") + msg);
00143         }
00144    catch (const XMLException &err) {
00145       string msg = getStringValue(err.getMessage());
00146       delete parser;
00147       throw XmlBlasterException(USER_ILLEGALARGUMENT, ME + "::parse", string("XMLException: ") + msg);
00148    }
00149    catch (SAXException &err) {
00150       string msg = getStringValue(err.getMessage());
00151       delete parser;
00152       throw XmlBlasterException(USER_ILLEGALARGUMENT, ME + "::parse", string("SAXException: ") + msg);
00153    }
00154    catch (const std::exception& err) { // catches all of bad_alloc, bad_cast, runtime_error, ...
00155       string msg = err.what() + string(": ") + xmlData;
00156       delete parser;
00157       throw XmlBlasterException(USER_ILLEGALARGUMENT, ME + "::parse", string("std:exception: ") + msg);
00158    }
00159    catch (const string& err) {
00160      string msg = err;
00161      delete parser;
00162      throw XmlBlasterException(INTERNAL_UNKNOWN, ME + "::parse", string("string exception. message:") + err + ": " + xmlData);
00163    }
00164    catch (const char* err) {
00165      string msg = err;
00166      delete parser;
00167      throw XmlBlasterException(INTERNAL_UNKNOWN, ME + "::parse", string("char *exception. message:") + err + ": " + xmlData);
00168    }
00169    catch (...) {
00170      delete parser;
00171      throw XmlBlasterException(INTERNAL_UNKNOWN, ME + "::parse", string("Unknown parse exception: ") + xmlData);
00172    }
00173 }
00174 
00176 void Sax2Parser::endDocument() 
00177 {
00178    if (log_.call()) log_.call(ME, string("endDocument"));
00179    handler_->endDocument();
00180 }
00181 
00183 void Sax2Parser::endElement(const XMLCh *const /*uri*/, const XMLCh *const /*localname*/, const XMLCh *const qname)
00184 {
00185    //if (log_.call()) log_.call(ME, string("endElement"));
00186    handler_->endElement(getStringValue(qname));
00187 }
00188 
00190 void Sax2Parser::startDocument()
00191 {
00192    //if (log_.call()) log_.call(ME, string("startDocument"));
00193    handler_->startDocument();
00194 }
00195 
00197 void Sax2Parser::startElement(const XMLCh *const /*uri*/, const XMLCh *const /*localname*/, const XMLCh *const qname, const Attributes &attrs)
00198 {
00199    //if (log_.call()) log_.call(ME, "startElement <" + name + ">");
00200    AttributeMap tmpMap;
00201    handler_->startElement(getStringValue(qname), getAttributeMap(tmpMap, attrs));
00202 }
00203 
00205 void Sax2Parser::endCDATA()
00206 {
00207    //if (log_.call()) log_.call(ME, string("endCDATA"));
00208    handler_->endCDATA();
00209 }
00210 
00212 void Sax2Parser::startCDATA()
00213 {
00214    //if (log_.call()) log_.call(ME, string("startCDATA"));
00215    handler_->startCDATA();
00216 }
00217 
00219 void Sax2Parser::characters(const XMLCh *const chars, const unsigned int length)
00220 {
00221    //if (log_.call()) log_.call(ME, string("characters"));
00222    string tmp;
00223    bool doTrim = false;
00224    tmp.assign(getStringValue(chars, doTrim), 0, length);
00225    handler_->characters(tmp);
00226 }
00227 
00228 //
00229 // ErrorHandler methods
00230 //
00231 
00233 void Sax2Parser::warning(const SAXParseException &ex) 
00234 {
00235    if (log_.call()) log_.call(ME, string("warning"));
00236    string txt = getLocationString(ex) + "\n";
00237    handler_->warning(txt);
00238 }
00239       
00240       
00242 void Sax2Parser::error(const SAXParseException &ex) 
00243 {
00244    if (log_.call()) log_.call(ME, string("error"));
00245    string txt = getLocationString(ex) + "\n";
00246    handler_->error(txt);
00247 }
00248 
00249 
00251 void Sax2Parser::fatalError(const SAXParseException &ex) 
00252 {
00253    if (log_.call()) log_.call(ME, string("fatalError"));
00254    string txt = getLocationString(ex) + "\n";
00255    handler_->fatalError(txt);
00256 }
00257 
00258 
00260 string Sax2Parser::getLocationString(const SAXParseException &ex) 
00261 {
00262   string systemId = getStringValue(ex.getSystemId());
00263   string str;
00264   if (systemId != "") {
00265     string::size_type index = systemId.find_last_of('/');
00266     if (index != string::npos) systemId = systemId.substr(index + 1);
00267     str = systemId + ":";
00268   }
00269   string message = Sax2Parser::getStringValue(ex.getMessage(), true);
00270   return str + "line=" + lexical_cast<std::string>(ex.getLineNumber()) 
00271       + "/col=" + lexical_cast<std::string>(ex.getColumnNumber()) + " " + message;
00272 }
00273 
00274 
00282 bool Sax2Parser::caseCompare(const XMLCh *name1, const char *name2) 
00283 {
00284   XMLCh* name1Helper = XMLString::replicate(name1);
00285   XMLString::upperCase(name1Helper);
00286   XMLCh* name2Helper = XMLString::transcode(name2);
00287   XMLString::upperCase(name2Helper);
00288   bool ret = (XMLString::compareIString(name1Helper, name2Helper) == 0);
00289   Sax2Parser::releaseXMLCh(&name1Helper);
00290   Sax2Parser::releaseXMLCh(&name2Helper);
00291   return ret;
00292 }
00293 
00294 
00298 string Sax2Parser::getStringValue(const XMLCh* const value, bool doTrim) const
00299 {
00300    /* Works only with US-ASCII:
00301    char* help = 0;
00302    try {
00303       string ret;
00304       help = XMLString::transcode(value);
00305       if (help != 0) {
00306          if (doTrim) ret = StringTrim::trim(help);
00307          else ret = string(help);
00308          Sax2Parser::releaseXMLCh(&help);
00309       }
00310    }
00311    catch (...) {
00312       if (help != 0)
00313          Sax2Parser::releaseXMLCh(&help);
00314       cerr << "Caught exception in getStringValue(XMLCh=" << value << ")" << endl;
00315       // throw;
00316    }
00317    */
00318    if (value == NULL) {
00319       return "";
00320    }
00321 
00322 /*
00323 Converts from the encoding of the service to the internal XMLCh* encoding.
00324 unsigned int
00325 XMLUTF8Transcoder::transcodeFrom(const  XMLByte* const          srcData
00326                                 , const unsigned int            srcCount
00327                                 ,       XMLCh* const            toFill
00328                                 , const unsigned int            maxChars
00329                                 ,       unsigned int&           bytesEaten
00330                                 ,       unsigned char* const    charSizes)
00331 */
00332 /*
00333 Converts from the internal XMLCh* encoding to the encoding of the service.
00334 Parameters:
00335     srcData     the source buffer to be transcoded
00336     srcCount    number of characters in the source buffer
00337     toFill      the destination buffer
00338     maxBytes    the max number of bytes in the destination buffer
00339     charsEaten  after transcoding, this will hold the number of chars that were processed from the source buffer
00340     options     options to pass to the transcoder that explain how to respond to an unrepresentable character
00341 
00342 Returns:
00343     Returns the number of chars put into the target buffer 
00344 unsigned int
00345 XMLUTF8Transcoder::transcodeTo( const   XMLCh* const    srcData
00346                                 , const unsigned int    srcCount
00347                                 ,       XMLByte* const  toFill
00348                                 , const unsigned int    maxBytes
00349                                 ,       unsigned int&   charsEaten
00350                                 , const UnRepOpts       options)
00351 
00352 */
00353 
00354    unsigned int charsEatenFromSource = 0;
00355    unsigned int counter = 0;
00356    string result;
00357    unsigned int charsToRead = XMLString::stringLen(value);
00358    do {
00359       char resultXMLString_Encoded[ENCODERBUFFERSIZE+4];
00360       *resultXMLString_Encoded = 0;
00361       charsEatenFromSource = 0;
00362       int charsPutToTarget = xmlBlasterTranscoder_->transcodeTo(value+counter,
00363                                     XMLString::stringLen(value)-counter,
00364                                     (XMLByte*) resultXMLString_Encoded,
00365                                     ENCODERBUFFERSIZE,
00366                                     charsEatenFromSource,
00367                                     XMLTranscoder::UnRep_Throw );
00368 
00369       /*
00370       log_.info(ME,"TRANSCODE TMP: got '" + result +
00371                    "' charsToRead= " + lexical_cast<string>(charsToRead) +
00372                    "' ENCODERBUFFERSIZE= " + lexical_cast<string>(ENCODERBUFFERSIZE) +
00373                    " charsEaten=" + lexical_cast<string>(charsEatenFromSource) +
00374                    " counter=" + lexical_cast<string>(counter) +
00375                    " charsPutToTarget=" + lexical_cast<string>(charsPutToTarget));
00376       */
00377       if (charsEatenFromSource < 1)
00378          break;
00379       result += string(resultXMLString_Encoded, charsPutToTarget);
00380       counter += charsEatenFromSource;
00381    }
00382    while(charsEatenFromSource < charsToRead); //charsEatenFromSource== ENCODERBUFFERSIZE || charsPutToTarget == ENCODERBUFFERSIZE);
00383 
00384    //log_.info(ME,"TRANSCODE DONE: got '" + result + "' ENCODERBUFFERSIZE= " + lexical_cast<string>(ENCODERBUFFERSIZE) + " charsEaten=" + lexical_cast<string>(charsEatenFromSource));
00385 
00386    if (doTrim) StringTrim::trim(result);
00387 
00388    return result;
00389 }
00390 
00391 
00392 AttributeMap& Sax2Parser::getAttributeMap(AttributeMap& attrMap, const Attributes &attrs)
00393 {
00394    int len = attrs.getLength();
00395    for (int i = 0; i < len; i++) {
00396       attrMap[getStringValue(attrs.getQName(i))] = getStringValue(attrs.getValue(i));
00397    }
00398    return attrMap;
00399 }
00400 
00407 bool Sax2Parser::getStringAttr(const Attributes& attrs, const XMLCh* const name, string& value, bool doTrim) const
00408 {
00409    const XMLCh* tmp = attrs.getValue(name);
00410    if (!tmp) return false;
00411 
00412    char* help1 = NULL;
00413    try {
00414       help1 = XMLString::transcode(tmp);
00415       if (!help1) return false;
00416       if (doTrim) {
00417          value.assign(StringTrim::trim(help1));
00418       }
00419       else value.assign(help1);
00420    }
00421    catch (...) {}
00422    Sax2Parser::releaseXMLCh(&help1);
00423    return true;
00424 }
00425 
00426 
00427 void Sax2Parser::releaseXMLCh(XMLCh** data)
00428 {
00429 #if XERCES_VERSION_MAJOR > 1 && XERCES_VERSION_MINOR > 1
00430    XMLString::release(data);
00431 #else
00432    delete [] *data;
00433    *data = 0;
00434 #endif
00435 }
00436 
00437 void Sax2Parser::releaseXMLCh(char** data)
00438 {
00439 #if XERCES_VERSION_MAJOR > 1 && XERCES_VERSION_MINOR > 1
00440    XMLString::release(data);
00441 #else
00442    delete [] *data;
00443    *data = 0;
00444 #endif
00445 }
00446 
00447 #endif  // XMLBLASTER_XERCES_PLUGIN
00448 
00449 }}}} // namespace
00450 #endif // _UTIL_PARSER_SAX2XERCESPARSER_C
00451