00001 /*----------------------------------------------------------------------------- 00002 Name: Sax2XercesParser.cpp 00003 Project: xmlBlaster.org 00004 Copyright: xmlBlaster.org, see xmlBlaster-LICENSE file 00005 Comment: Default handling of Sax callbacks 00006 -----------------------------------------------------------------------------*/ 00007 00008 #ifndef _UTIL_PARSER_SAX2XERCESPARSER_C 00009 #define _UTIL_PARSER_SAX2XERCESPARSER_C 00010 00011 #if defined(XMLBLASTER_MSXML_PLUGIN) 00012 # error Implement Microsoft XML parser for /DXMLBLASTER_MSXML_PLUGIN 00013 #else // XMLBLASTER_XERCES_PLUGIN 00014 00015 #if defined(_WIN32) 00016 #pragma warning(disable:4786) 00017 #endif 00018 00019 #include <util/parser/Sax2XercesParser.h> 00020 #include <xercesc/sax/SAXException.hpp> 00021 #include <xercesc/sax2/SAX2XMLReader.hpp> 00022 #include <xercesc/sax2/XMLReaderFactory.hpp> 00023 #include <xercesc/util/PlatformUtils.hpp> 00024 #include <xercesc/framework/MemBufInputSource.hpp> 00025 #include <util/XmlBlasterException.h> 00026 #include <util/Global.h> 00027 #include <util/lexical_cast.h> 00028 #include <iostream> 00029 //#include <cstdlib> //<stdlib.h> 00030 00031 namespace org { namespace xmlBlaster { namespace util { namespace parser { 00032 00033 using namespace std; 00034 00035 static const int ENCODERBUFFERSIZE = 16*1024; 00036 00037 Sax2Parser::Sax2Parser(org::xmlBlaster::util::Global& global, XmlHandlerBase *handler) : 00038 I_Parser(handler), ME("Sax2Parser"), global_(global), log_(global.getLog("org.xmlBlaster.util.xml")), 00039 xmlBlasterTranscoder_(0) 00040 { 00041 if (log_.call()) log_.trace(ME, "Creating new Sax2Parser"); 00042 00043 //"UTF-8" is currently not supported with our std::string usage! 00044 encoding_ = global_.getProperty().getStringProperty("xmlBlaster/encoding", "iso-8859-1"); 00045 00046 XMLTransService::Codes resCode; 00047 xmlBlasterTranscoder_ = XMLPlatformUtils::fgTransService->makeNewTranscoderFor(encoding_.c_str(), resCode, ENCODERBUFFERSIZE); 00048 if (resCode != 0/*XMLTransService::Codes::Ok*/) { 00049 log_.error(ME, "Creation of XMLTranscoder with encoding='" + encoding_ + "' failed with error code " + lexical_cast<string>((int)resCode) + 00050 ". Please check your SAX parser setting '-xmlBlaster/encoding'"); 00051 throw XmlBlasterException(USER_CONFIGURATION, ME, "Creation of XMLTranscoder with encoding='" + encoding_ + "' failed with " + lexical_cast<string>((int)resCode)); 00052 } 00053 else { 00054 if (log_.trace()) log_.trace(ME, "Created XMLTranscoder res=" + lexical_cast<string>((int)resCode) + " with encoding=" + encoding_); 00055 } 00056 } 00057 00058 Sax2Parser::~Sax2Parser() 00059 { 00060 delete xmlBlasterTranscoder_; 00061 } 00062 00063 std::string Sax2Parser::usage() 00064 { 00065 std::string text = string(""); 00066 //text += string("\n"); 00067 text += string("\nThe xerces SAX XML parser plugin configuration:"); 00068 text += string("\n -xmlBlaster/encoding [iso-8859-1]"); 00069 text += string("\n The parser encoding to use for xmlBlaster specific QoS and key SAX parsing"); 00070 text += string("\n"); 00071 return text; 00072 } 00073 00074 void Sax2Parser::init(const string &xmlLiteral) 00075 { 00076 if (xmlLiteral.size() > 0) { 00077 parse(xmlLiteral); 00078 } 00079 } 00080 00085 void Sax2Parser::parse(const string &xmlData) 00086 { 00087 //if (log_.call()) log_.call(ME, "parse"); 00088 //if (log_.trace()) log_.trace(ME, string("parse content:'") + xmlData + string("'")); 00089 00090 SAX2XMLReader *parser = NULL; 00091 //XMLCh* encodingHelper = NULL; 00092 try { 00093 parser = XMLReaderFactory::createXMLReader(); 00094 parser->setContentHandler(this); 00095 parser->setErrorHandler(this); 00096 parser->setLexicalHandler(this); 00097 00098 // "UTF-8" "iso-8859-1" 00099 //string xmlData1 = string("<?xml version=\"1.0\" encoding=\""+encoding_+"\"?>\n") + xmlData; 00100 const string &xmlData1 = xmlData; 00101 //log_.info(ME, "Parsing now: " + xmlData1); 00102 00103 MemBufInputSource inSource((const XMLByte*)xmlData1.c_str(), (unsigned int)(xmlData1.size()), "xmlBlaster", false); 00104 00105 XMLCh tempStr[100]; 00106 XMLString::transcode(encoding_.c_str(), tempStr, 99); 00107 inSource.setEncoding(tempStr); 00108 //encodingHelper = XMLString::transcode(encoding.c_str()); 00109 //inSource.setEncoding(encodingHelper); 00110 //Sax2Parser::releaseXMLCh(&encodingHelper); 00111 00112 parser->parse(inSource); 00113 delete parser; 00114 } 00115 catch (StopParseException&) { 00116 // If it does not work, it could be wrapped into SAXParseException 00117 log_.error(ME, string("StopParseException: ") + 00118 "Parsing execution stopped half the way "); 00119 if (log_.trace()) { 00120 string help = XmlBlasterException::getStackTrace(); 00121 log_.plain(ME, help); 00122 } 00123 delete parser; // just in case it did not 00124 return; 00125 } 00126 catch (XmlBlasterException& ex) { 00127 throw ex; 00128 } 00129 catch (SAXParseException &err) { 00130 string loc = getLocationString(err) + string(": ") + getStringValue(err.getMessage()); 00131 delete parser; 00132 throw XmlBlasterException(USER_ILLEGALARGUMENT, ME + "::parse", string("SAXParseException") + loc); 00133 } 00134 catch (SAXNotRecognizedException &err) { 00135 string msg = getStringValue(err.getMessage()); 00136 delete parser; 00137 throw XmlBlasterException(USER_ILLEGALARGUMENT, ME + "::parse", string("SAXNotRecognizedException: ") + msg); 00138 } 00139 catch (SAXNotSupportedException &err) { 00140 string msg = getStringValue(err.getMessage()); 00141 delete parser; 00142 throw XmlBlasterException(USER_ILLEGALARGUMENT, ME + "::parse", string("SAXNotSupportedException: ") + msg); 00143 } 00144 catch (const XMLException &err) { 00145 string msg = getStringValue(err.getMessage()); 00146 delete parser; 00147 throw XmlBlasterException(USER_ILLEGALARGUMENT, ME + "::parse", string("XMLException: ") + msg); 00148 } 00149 catch (SAXException &err) { 00150 string msg = getStringValue(err.getMessage()); 00151 delete parser; 00152 throw XmlBlasterException(USER_ILLEGALARGUMENT, ME + "::parse", string("SAXException: ") + msg); 00153 } 00154 catch (const std::exception& err) { // catches all of bad_alloc, bad_cast, runtime_error, ... 00155 string msg = err.what() + string(": ") + xmlData; 00156 delete parser; 00157 throw XmlBlasterException(USER_ILLEGALARGUMENT, ME + "::parse", string("std:exception: ") + msg); 00158 } 00159 catch (const string& err) { 00160 string msg = err; 00161 delete parser; 00162 throw XmlBlasterException(INTERNAL_UNKNOWN, ME + "::parse", string("string exception. message:") + err + ": " + xmlData); 00163 } 00164 catch (const char* err) { 00165 string msg = err; 00166 delete parser; 00167 throw XmlBlasterException(INTERNAL_UNKNOWN, ME + "::parse", string("char *exception. message:") + err + ": " + xmlData); 00168 } 00169 catch (...) { 00170 delete parser; 00171 throw XmlBlasterException(INTERNAL_UNKNOWN, ME + "::parse", string("Unknown parse exception: ") + xmlData); 00172 } 00173 } 00174 00176 void Sax2Parser::endDocument() 00177 { 00178 if (log_.call()) log_.call(ME, string("endDocument")); 00179 handler_->endDocument(); 00180 } 00181 00183 void Sax2Parser::endElement(const XMLCh *const /*uri*/, const XMLCh *const /*localname*/, const XMLCh *const qname) 00184 { 00185 //if (log_.call()) log_.call(ME, string("endElement")); 00186 handler_->endElement(getStringValue(qname)); 00187 } 00188 00190 void Sax2Parser::startDocument() 00191 { 00192 //if (log_.call()) log_.call(ME, string("startDocument")); 00193 handler_->startDocument(); 00194 } 00195 00197 void Sax2Parser::startElement(const XMLCh *const /*uri*/, const XMLCh *const /*localname*/, const XMLCh *const qname, const Attributes &attrs) 00198 { 00199 //if (log_.call()) log_.call(ME, "startElement <" + name + ">"); 00200 AttributeMap tmpMap; 00201 handler_->startElement(getStringValue(qname), getAttributeMap(tmpMap, attrs)); 00202 } 00203 00205 void Sax2Parser::endCDATA() 00206 { 00207 //if (log_.call()) log_.call(ME, string("endCDATA")); 00208 handler_->endCDATA(); 00209 } 00210 00212 void Sax2Parser::startCDATA() 00213 { 00214 //if (log_.call()) log_.call(ME, string("startCDATA")); 00215 handler_->startCDATA(); 00216 } 00217 00219 void Sax2Parser::characters(const XMLCh *const chars, const unsigned int length) 00220 { 00221 //if (log_.call()) log_.call(ME, string("characters")); 00222 string tmp; 00223 bool doTrim = false; 00224 tmp.assign(getStringValue(chars, doTrim), 0, length); 00225 handler_->characters(tmp); 00226 } 00227 00228 // 00229 // ErrorHandler methods 00230 // 00231 00233 void Sax2Parser::warning(const SAXParseException &ex) 00234 { 00235 if (log_.call()) log_.call(ME, string("warning")); 00236 string txt = getLocationString(ex) + "\n"; 00237 handler_->warning(txt); 00238 } 00239 00240 00242 void Sax2Parser::error(const SAXParseException &ex) 00243 { 00244 if (log_.call()) log_.call(ME, string("error")); 00245 string txt = getLocationString(ex) + "\n"; 00246 handler_->error(txt); 00247 } 00248 00249 00251 void Sax2Parser::fatalError(const SAXParseException &ex) 00252 { 00253 if (log_.call()) log_.call(ME, string("fatalError")); 00254 string txt = getLocationString(ex) + "\n"; 00255 handler_->fatalError(txt); 00256 } 00257 00258 00260 string Sax2Parser::getLocationString(const SAXParseException &ex) 00261 { 00262 string systemId = getStringValue(ex.getSystemId()); 00263 string str; 00264 if (systemId != "") { 00265 string::size_type index = systemId.find_last_of('/'); 00266 if (index != string::npos) systemId = systemId.substr(index + 1); 00267 str = systemId + ":"; 00268 } 00269 string message = Sax2Parser::getStringValue(ex.getMessage(), true); 00270 return str + "line=" + lexical_cast<std::string>(ex.getLineNumber()) 00271 + "/col=" + lexical_cast<std::string>(ex.getColumnNumber()) + " " + message; 00272 } 00273 00274 00282 bool Sax2Parser::caseCompare(const XMLCh *name1, const char *name2) 00283 { 00284 XMLCh* name1Helper = XMLString::replicate(name1); 00285 XMLString::upperCase(name1Helper); 00286 XMLCh* name2Helper = XMLString::transcode(name2); 00287 XMLString::upperCase(name2Helper); 00288 bool ret = (XMLString::compareIString(name1Helper, name2Helper) == 0); 00289 Sax2Parser::releaseXMLCh(&name1Helper); 00290 Sax2Parser::releaseXMLCh(&name2Helper); 00291 return ret; 00292 } 00293 00294 00298 string Sax2Parser::getStringValue(const XMLCh* const value, bool doTrim) const 00299 { 00300 /* Works only with US-ASCII: 00301 char* help = 0; 00302 try { 00303 string ret; 00304 help = XMLString::transcode(value); 00305 if (help != 0) { 00306 if (doTrim) ret = StringTrim::trim(help); 00307 else ret = string(help); 00308 Sax2Parser::releaseXMLCh(&help); 00309 } 00310 } 00311 catch (...) { 00312 if (help != 0) 00313 Sax2Parser::releaseXMLCh(&help); 00314 cerr << "Caught exception in getStringValue(XMLCh=" << value << ")" << endl; 00315 // throw; 00316 } 00317 */ 00318 if (value == NULL) { 00319 return ""; 00320 } 00321 00322 /* 00323 Converts from the encoding of the service to the internal XMLCh* encoding. 00324 unsigned int 00325 XMLUTF8Transcoder::transcodeFrom(const XMLByte* const srcData 00326 , const unsigned int srcCount 00327 , XMLCh* const toFill 00328 , const unsigned int maxChars 00329 , unsigned int& bytesEaten 00330 , unsigned char* const charSizes) 00331 */ 00332 /* 00333 Converts from the internal XMLCh* encoding to the encoding of the service. 00334 Parameters: 00335 srcData the source buffer to be transcoded 00336 srcCount number of characters in the source buffer 00337 toFill the destination buffer 00338 maxBytes the max number of bytes in the destination buffer 00339 charsEaten after transcoding, this will hold the number of chars that were processed from the source buffer 00340 options options to pass to the transcoder that explain how to respond to an unrepresentable character 00341 00342 Returns: 00343 Returns the number of chars put into the target buffer 00344 unsigned int 00345 XMLUTF8Transcoder::transcodeTo( const XMLCh* const srcData 00346 , const unsigned int srcCount 00347 , XMLByte* const toFill 00348 , const unsigned int maxBytes 00349 , unsigned int& charsEaten 00350 , const UnRepOpts options) 00351 00352 */ 00353 00354 unsigned int charsEatenFromSource = 0; 00355 unsigned int counter = 0; 00356 string result; 00357 unsigned int charsToRead = XMLString::stringLen(value); 00358 do { 00359 char resultXMLString_Encoded[ENCODERBUFFERSIZE+4]; 00360 *resultXMLString_Encoded = 0; 00361 charsEatenFromSource = 0; 00362 int charsPutToTarget = xmlBlasterTranscoder_->transcodeTo(value+counter, 00363 XMLString::stringLen(value)-counter, 00364 (XMLByte*) resultXMLString_Encoded, 00365 ENCODERBUFFERSIZE, 00366 charsEatenFromSource, 00367 XMLTranscoder::UnRep_Throw ); 00368 00369 /* 00370 log_.info(ME,"TRANSCODE TMP: got '" + result + 00371 "' charsToRead= " + lexical_cast<string>(charsToRead) + 00372 "' ENCODERBUFFERSIZE= " + lexical_cast<string>(ENCODERBUFFERSIZE) + 00373 " charsEaten=" + lexical_cast<string>(charsEatenFromSource) + 00374 " counter=" + lexical_cast<string>(counter) + 00375 " charsPutToTarget=" + lexical_cast<string>(charsPutToTarget)); 00376 */ 00377 if (charsEatenFromSource < 1) 00378 break; 00379 result += string(resultXMLString_Encoded, charsPutToTarget); 00380 counter += charsEatenFromSource; 00381 } 00382 while(charsEatenFromSource < charsToRead); //charsEatenFromSource== ENCODERBUFFERSIZE || charsPutToTarget == ENCODERBUFFERSIZE); 00383 00384 //log_.info(ME,"TRANSCODE DONE: got '" + result + "' ENCODERBUFFERSIZE= " + lexical_cast<string>(ENCODERBUFFERSIZE) + " charsEaten=" + lexical_cast<string>(charsEatenFromSource)); 00385 00386 if (doTrim) StringTrim::trim(result); 00387 00388 return result; 00389 } 00390 00391 00392 AttributeMap& Sax2Parser::getAttributeMap(AttributeMap& attrMap, const Attributes &attrs) 00393 { 00394 int len = attrs.getLength(); 00395 for (int i = 0; i < len; i++) { 00396 attrMap[getStringValue(attrs.getQName(i))] = getStringValue(attrs.getValue(i)); 00397 } 00398 return attrMap; 00399 } 00400 00407 bool Sax2Parser::getStringAttr(const Attributes& attrs, const XMLCh* const name, string& value, bool doTrim) const 00408 { 00409 const XMLCh* tmp = attrs.getValue(name); 00410 if (!tmp) return false; 00411 00412 char* help1 = NULL; 00413 try { 00414 help1 = XMLString::transcode(tmp); 00415 if (!help1) return false; 00416 if (doTrim) { 00417 value.assign(StringTrim::trim(help1)); 00418 } 00419 else value.assign(help1); 00420 } 00421 catch (...) {} 00422 Sax2Parser::releaseXMLCh(&help1); 00423 return true; 00424 } 00425 00426 00427 void Sax2Parser::releaseXMLCh(XMLCh** data) 00428 { 00429 #if XERCES_VERSION_MAJOR > 1 && XERCES_VERSION_MINOR > 1 00430 XMLString::release(data); 00431 #else 00432 delete [] *data; 00433 *data = 0; 00434 #endif 00435 } 00436 00437 void Sax2Parser::releaseXMLCh(char** data) 00438 { 00439 #if XERCES_VERSION_MAJOR > 1 && XERCES_VERSION_MINOR > 1 00440 XMLString::release(data); 00441 #else 00442 delete [] *data; 00443 *data = 0; 00444 #endif 00445 } 00446 00447 #endif // XMLBLASTER_XERCES_PLUGIN 00448 00449 }}}} // namespace 00450 #endif // _UTIL_PARSER_SAX2XERCESPARSER_C 00451