1 /*-----------------------------------------------------------------------------
  2 Name:      Sax2XercesParser.cpp
  3 Project:   xmlBlaster.org
  4 Copyright: xmlBlaster.org, see xmlBlaster-LICENSE file
  5 Comment:   Default handling of Sax callbacks
  6 -----------------------------------------------------------------------------*/
  7 
  8 #ifndef _UTIL_PARSER_SAX2XERCESPARSER_C
  9 #define _UTIL_PARSER_SAX2XERCESPARSER_C
 10 
 11 #if defined(XMLBLASTER_MSXML_PLUGIN)
 12 #  error Implement Microsoft XML parser for /DXMLBLASTER_MSXML_PLUGIN
 13 #else  // XMLBLASTER_XERCES_PLUGIN
 14 
 15 #if defined(_WIN32)
 16   #pragma warning(disable:4786)
 17 #endif
 18 
 19 #include <util/parser/Sax2XercesParser.h>
 20 #include <xercesc/sax/SAXException.hpp>
 21 #include <xercesc/sax2/SAX2XMLReader.hpp>
 22 #include <xercesc/sax2/XMLReaderFactory.hpp>
 23 #include <xercesc/util/PlatformUtils.hpp>
 24 #include <xercesc/util/XercesVersion.hpp>
 25 #include <xercesc/framework/MemBufInputSource.hpp>
 26 #include <util/XmlBlasterException.h>
 27 #include <util/Global.h>
 28 #include <util/lexical_cast.h>
 29 #include <iostream>
 30 //#include <cstdlib> //<stdlib.h>
 31 
 32 namespace org { namespace xmlBlaster { namespace util { namespace parser {
 33 
 34 using namespace std;
 35 
 36 static const int ENCODERBUFFERSIZE = 16*1024;
 37 
 38 Sax2Parser::Sax2Parser(org::xmlBlaster::util::Global& global, XmlHandlerBase *handler) : 
 39     I_Parser(handler), ME("Sax2Parser"), global_(global), log_(global.getLog("org.xmlBlaster.util.xml")),
 40     xmlBlasterTranscoder_(0)
 41 {
 42    if (log_.call()) log_.trace(ME, "Creating new Sax2Parser");
 43 
 44    //"UTF-8" is currently not supported with our std::string usage!
 45    encoding_ = global_.getProperty().getStringProperty("xmlBlaster/encoding", "iso-8859-1");
 46 
 47    XMLTransService::Codes resCode;
 48    xmlBlasterTranscoder_ = XMLPlatformUtils::fgTransService->makeNewTranscoderFor(encoding_.c_str(), resCode, ENCODERBUFFERSIZE);
 49    if (resCode != 0/*XMLTransService::Codes::Ok*/) {
 50       log_.error(ME, "Creation of XMLTranscoder with encoding='" + encoding_ + "' failed with error code " + lexical_cast<string>((int)resCode) +
 51                      ". Please check your SAX parser setting '-xmlBlaster/encoding'");
 52       throw XmlBlasterException(USER_CONFIGURATION, ME, "Creation of XMLTranscoder with encoding='" + encoding_ + "' failed with " + lexical_cast<string>((int)resCode));
 53    }
 54    else {
 55       if (log_.trace()) log_.trace(ME, "Created XMLTranscoder res=" + lexical_cast<string>((int)resCode) + " with encoding=" + encoding_);
 56    }
 57 }
 58 
 59 Sax2Parser::~Sax2Parser()
 60 {
 61    delete xmlBlasterTranscoder_;
 62 }
 63 
 64 std::string Sax2Parser::usage() 
 65 {
 66    std::string text = string("");
 67    //text += string("\n");
 68    text += string("\nThe xerces SAX XML parser plugin configuration:");
 69    text += string("\n   -xmlBlaster/encoding [iso-8859-1]");
 70    text += string("\n                       The parser encoding to use for xmlBlaster specific QoS and key SAX parsing");
 71    text += string("\n");
 72    return text;
 73 }
 74 
 75 void Sax2Parser::init(const string &xmlLiteral) 
 76 {
 77    if (xmlLiteral.size() > 0) {
 78       parse(xmlLiteral);
 79    }
 80 }
 81       
 82 /**
 83  * Does the actual parsing
 84  * @param xmlData Quality of service in XML notation
 85  */
 86 void Sax2Parser::parse(const string &xmlData) 
 87 {
 88    //if (log_.call()) log_.call(ME, "parse");
 89    //if (log_.trace()) log_.trace(ME, string("parse content:'") + xmlData + string("'"));
 90  
 91    SAX2XMLReader *parser = NULL;
 92    //XMLCh* encodingHelper = NULL;
 93    try {
 94       parser = XMLReaderFactory::createXMLReader();
 95       parser->setContentHandler(this);
 96       parser->setErrorHandler(this);
 97       parser->setLexicalHandler(this);
 98 
 99       // "UTF-8"  "iso-8859-1"
100       //string xmlData1 = string("<?xml version=\"1.0\" encoding=\""+encoding_+"\"?>\n") + xmlData;
101       const string &xmlData1 = xmlData;
102       //log_.info(ME, "Parsing now: " + xmlData1);
103       
104       MemBufInputSource inSource((const XMLByte*)xmlData1.c_str(), (unsigned int)(xmlData1.size()), "xmlBlaster", false);
105       
106       XMLCh tempStr[100];
107       XMLString::transcode(encoding_.c_str(), tempStr, 99);
108       inSource.setEncoding(tempStr);
109       //encodingHelper = XMLString::transcode(encoding.c_str());
110       //inSource.setEncoding(encodingHelper);
111       //Sax2Parser::releaseXMLCh(&encodingHelper);
112 
113       parser->parse(inSource);
114       delete parser;
115    }
116    catch (StopParseException&) {
117       // If it does not work, it could be wrapped into SAXParseException
118       log_.error(ME, string("StopParseException: ") +
119                               "Parsing execution stopped half the way ");
120       if (log_.trace()) {
121          string help = XmlBlasterException::getStackTrace();
122          log_.plain(ME, help);
123       }
124       delete parser; // just in case it did not 
125       return;
126    }
127    catch (XmlBlasterException& ex) {
128       throw ex;
129    }
130    catch (SAXParseException &err) {
131       string loc = getLocationString(err) + string(": ") + getStringValue(err.getMessage());
132       delete parser;
133       throw XmlBlasterException(USER_ILLEGALARGUMENT, ME + "::parse", string("SAXParseException") + loc);
134    }
135    catch (SAXNotRecognizedException &err) {
136       string msg = getStringValue(err.getMessage());
137       delete parser;
138       throw XmlBlasterException(USER_ILLEGALARGUMENT, ME + "::parse", string("SAXNotRecognizedException: ") + msg);
139         }
140    catch (SAXNotSupportedException &err) {
141       string msg = getStringValue(err.getMessage());
142       delete parser;
143       throw XmlBlasterException(USER_ILLEGALARGUMENT, ME + "::parse", string("SAXNotSupportedException: ") + msg);
144         }
145    catch (const XMLException &err) {
146       string msg = getStringValue(err.getMessage());
147       delete parser;
148       throw XmlBlasterException(USER_ILLEGALARGUMENT, ME + "::parse", string("XMLException: ") + msg);
149    }
150    catch (SAXException &err) {
151       string msg = getStringValue(err.getMessage());
152       delete parser;
153       throw XmlBlasterException(USER_ILLEGALARGUMENT, ME + "::parse", string("SAXException: ") + msg);
154    }
155    catch (const std::exception& err) { // catches all of bad_alloc, bad_cast, runtime_error, ...
156       string msg = err.what() + string(": ") + xmlData;
157       delete parser;
158       throw XmlBlasterException(USER_ILLEGALARGUMENT, ME + "::parse", string("std:exception: ") + msg);
159    }
160    catch (const string& err) {
161      string msg = err;
162      delete parser;
163      throw XmlBlasterException(INTERNAL_UNKNOWN, ME + "::parse", string("string exception. message:") + err + ": " + xmlData);
164    }
165    catch (const char* err) {
166      string msg = err;
167      delete parser;
168      throw XmlBlasterException(INTERNAL_UNKNOWN, ME + "::parse", string("char *exception. message:") + err + ": " + xmlData);
169    }
170    catch (...) {
171      delete parser;
172      throw XmlBlasterException(INTERNAL_UNKNOWN, ME + "::parse", string("Unknown parse exception: ") + xmlData);
173    }
174 }
175 
176 /** Receive notification of the end of the document. */
177 void Sax2Parser::endDocument() 
178 {
179    if (log_.call()) log_.call(ME, string("endDocument"));
180    handler_->endDocument();
181 }
182 
183 /** Receive notification of the end of an element. */
184 void Sax2Parser::endElement(const XMLCh *const /*uri*/, const XMLCh *const /*localname*/, const XMLCh *const qname)
185 {
186    //if (log_.call()) log_.call(ME, string("endElement"));
187    handler_->endElement(getStringValue(qname));
188 }
189 
190 /** Receive notification of the beginning of the document. */
191 void Sax2Parser::startDocument()
192 {
193    //if (log_.call()) log_.call(ME, string("startDocument"));
194    handler_->startDocument();
195 }
196 
197 /** Receive notification of the start of an element. */
198 void Sax2Parser::startElement(const XMLCh *const /*uri*/, const XMLCh *const /*localname*/, const XMLCh *const qname, const Attributes &attrs)
199 {
200    AttributeMap tmpMap;
201    handler_->startElement(getStringValue(qname), getAttributeMap(tmpMap, attrs));
202 }
203 
204 /** Receive notification of the end of a CDATA section. */
205 void Sax2Parser::endCDATA()
206 {
207    //if (log_.call()) log_.call(ME, string("endCDATA"));
208    handler_->endCDATA();
209 }
210 
211 /** Receive notification of the start of a CDATA section. */
212 void Sax2Parser::startCDATA()
213 {
214    //if (log_.call()) log_.call(ME, string("startCDATA"));
215    handler_->startCDATA();
216 }
217 
218 /** Receive notification of character data inside an element. */
219 #if _XERCES_VERSION >= 30000
220 void Sax2Parser::characters (const XMLCh *const chars, const XMLSize_t length) // xerces 3
221 {
222    //if (log_.call()) log_.call(ME, string("characters"));
223    string tmp;
224    bool doTrim = false;
225    tmp.assign(getStringValue(chars, doTrim), 0, length);
226    handler_->characters(tmp);
227 }
228 #else
229 void Sax2Parser::characters(const XMLCh *const chars, const unsigned int length) // xerces 2
230 {
231    //if (log_.call()) log_.call(ME, string("characters"));
232    string tmp;
233    bool doTrim = false;
234    tmp.assign(getStringValue(chars, doTrim), 0, length);
235    handler_->characters(tmp);
236 }
237 #endif
238 
239 //
240 // ErrorHandler methods
241 //
242 
243 /** Warning. */
244 void Sax2Parser::warning(const SAXParseException &ex) 
245 {
246    if (log_.call()) log_.call(ME, string("warning"));
247    string txt = getLocationString(ex) + "\n";
248    handler_->warning(txt);
249 }
250       
251       
252 /** Error. */
253 void Sax2Parser::error(const SAXParseException &ex) 
254 {
255    if (log_.call()) log_.call(ME, string("error"));
256    string txt = getLocationString(ex) + "\n";
257    handler_->error(txt);
258 }
259 
260 
261 /** Fatal error. */
262 void Sax2Parser::fatalError(const SAXParseException &ex) 
263 {
264    if (log_.call()) log_.call(ME, string("fatalError"));
265    string txt = getLocationString(ex) + "\n";
266    handler_->fatalError(txt);
267 }
268 
269 
270 /** Returns a string of the location. */
271 string Sax2Parser::getLocationString(const SAXParseException &ex) 
272 {
273   string systemId = getStringValue(ex.getSystemId());
274   string str;
275   if (systemId != "") {
276     string::size_type index = systemId.find_last_of('/');
277     if (index != string::npos) systemId = systemId.substr(index + 1);
278     str = systemId + ":";
279   }
280   string message = Sax2Parser::getStringValue(ex.getMessage(), true);
281   return str + "line=" + lexical_cast<std::string>(ex.getLineNumber()) 
282       + "/col=" + lexical_cast<std::string>(ex.getColumnNumber()) + " " + message;
283 }
284 
285 
286 /**
287  * Compares two strings (where name1 is a Unicode3.0 string!!) for 
288  * unsensitive case compare. It returns true if the content of the
289  * strings is equal (no matter what the case is). Using this method to
290  * compare the strings should be portable to all platforms supported by
291  * xerces.
292  */
293 bool Sax2Parser::caseCompare(const XMLCh *name1, const char *name2) 
294 {
295   XMLCh* name1Helper = XMLString::replicate(name1);
296   XMLString::upperCase(name1Helper);
297   XMLCh* name2Helper = XMLString::transcode(name2);
298   XMLString::upperCase(name2Helper);
299   bool ret = (XMLString::compareIString(name1Helper, name2Helper) == 0);
300   Sax2Parser::releaseXMLCh(&name1Helper);
301   Sax2Parser::releaseXMLCh(&name2Helper);
302   return ret;
303 }
304 
305 
306 /**
307  * returns a trimmed value (usually from an attribute) as a string
308  */
309 string Sax2Parser::getStringValue(const XMLCh* const value, bool doTrim) const
310 {
311    /* Works only with US-ASCII:
312    char* help = 0;
313    try {
314       string ret;
315       help = XMLString::transcode(value);
316       if (help != 0) {
317          if (doTrim) ret = StringTrim::trim(help);
318          else ret = string(help);
319          Sax2Parser::releaseXMLCh(&help);
320       }
321    }
322    catch (...) {
323       if (help != 0)
324          Sax2Parser::releaseXMLCh(&help);
325       cerr << "Caught exception in getStringValue(XMLCh=" << value << ")" << endl;
326       // throw;
327    }
328    */
329    if (value == NULL) {
330       return "";
331    }
332 
333 /*
334 Converts from the encoding of the service to the internal XMLCh* encoding.
335 unsigned int
336 XMLUTF8Transcoder::transcodeFrom(const  XMLByte* const          srcData
337                                 , const unsigned int            srcCount
338                                 ,       XMLCh* const            toFill
339                                 , const unsigned int            maxChars
340                                 ,       unsigned int&           bytesEaten
341                                 ,       unsigned char* const    charSizes)
342 */
343 /*
344 Converts from the internal XMLCh* encoding to the encoding of the service.
345 Parameters:
346     srcData     the source buffer to be transcoded
347     srcCount    number of characters in the source buffer
348     toFill      the destination buffer
349     maxBytes    the max number of bytes in the destination buffer
350     charsEaten  after transcoding, this will hold the number of chars that were processed from the source buffer
351     options     options to pass to the transcoder that explain how to respond to an unrepresentable character
352 
353 Returns:
354     Returns the number of chars put into the target buffer 
355 unsigned int
356 XMLUTF8Transcoder::transcodeTo( const   XMLCh* const    srcData
357                                 , const unsigned int    srcCount
358                                 ,       XMLByte* const  toFill
359                                 , const unsigned int    maxBytes
360                                 ,       unsigned int&   charsEaten
361                                 , const UnRepOpts       options)
362 
363 */
364 
365    unsigned int charsEatenFromSource = 0;
366    unsigned int counter = 0;
367    string result;
368    unsigned int charsToRead = XMLString::stringLen(value);
369    do {
370       char resultXMLString_Encoded[ENCODERBUFFERSIZE+4];
371       *resultXMLString_Encoded = 0;
372       charsEatenFromSource = 0;
373      #if XERCES_VERSION_MAJOR >= 3
374       int charsPutToTarget = xmlBlasterTranscoder_->transcodeTo(value+counter,
375                                     XMLString::stringLen(value)-counter,
376                                     (XMLByte*) resultXMLString_Encoded,
377                                     (XMLSize_t) ENCODERBUFFERSIZE,
378                                     (XMLSize_t&) charsEatenFromSource,
379                                     XMLTranscoder::UnRep_Throw );
380      #else
381       int charsPutToTarget = xmlBlasterTranscoder_->transcodeTo(value+counter,
382                                     XMLString::stringLen(value)-counter,
383                                     (XMLByte*) resultXMLString_Encoded,
384                                     ENCODERBUFFERSIZE,
385                                     charsEatenFromSource,
386                                     XMLTranscoder::UnRep_Throw );   
387      #endif
388       /*
389       log_.info(ME,"TRANSCODE TMP: got '" + result +
390                    "' charsToRead= " + lexical_cast<string>(charsToRead) +
391                    "' ENCODERBUFFERSIZE= " + lexical_cast<string>(ENCODERBUFFERSIZE) +
392                    " charsEaten=" + lexical_cast<string>(charsEatenFromSource) +
393                    " counter=" + lexical_cast<string>(counter) +
394                    " charsPutToTarget=" + lexical_cast<string>(charsPutToTarget));
395       */
396       if (charsEatenFromSource < 1)
397          break;
398       result += string(resultXMLString_Encoded, charsPutToTarget);
399       counter += charsEatenFromSource;
400    }
401    while(charsEatenFromSource < charsToRead); //charsEatenFromSource== ENCODERBUFFERSIZE || charsPutToTarget == ENCODERBUFFERSIZE);
402 
403    //log_.info(ME,"TRANSCODE DONE: got '" + result + "' ENCODERBUFFERSIZE= " + lexical_cast<string>(ENCODERBUFFERSIZE) + " charsEaten=" + lexical_cast<string>(charsEatenFromSource));
404 
405    if (doTrim) StringTrim::trim(result);
406 
407    return result;
408 }
409 
410 
411 AttributeMap& Sax2Parser::getAttributeMap(AttributeMap& attrMap, const Attributes &attrs)
412 {
413    int len = attrs.getLength();
414    for (int i = 0; i < len; i++) {
415       attrMap[getStringValue(attrs.getQName(i))] = getStringValue(attrs.getValue(i));
416    }
417    return attrMap;
418 }
419 
420 /**
421  * gets the attribute specified by 'name' in the attribute list specified by 'list'. The result is put in 
422  * the 'value' argument which is passed by reference. It returns 'true' if the attribute was found in the
423  * specified attribute list or 'false' if it was not. In the later case, the value is untouched by this 
424  * method.
425  */
426 bool Sax2Parser::getStringAttr(const Attributes& attrs, const XMLCh* const name, string& value, bool doTrim) const
427 {
428    const XMLCh* tmp = attrs.getValue(name);
429    if (!tmp) return false;
430 
431    char* help1 = NULL;
432    try {
433       help1 = XMLString::transcode(tmp);
434       if (!help1) return false;
435       if (doTrim) {
436          value.assign(StringTrim::trim(help1));
437       }
438       else value.assign(help1);
439    }
440    catch (...) {}
441    Sax2Parser::releaseXMLCh(&help1);
442    return true;
443 }
444 
445 
446 void Sax2Parser::releaseXMLCh(XMLCh** data)
447 {
448 #if XERCES_VERSION_MAJOR > 1 && XERCES_VERSION_MINOR > 1
449    XMLString::release(data);
450 #else
451    delete [] *data;
452    *data = 0;
453 #endif
454 }
455 
456 void Sax2Parser::releaseXMLCh(char** data)
457 {
458 #if XERCES_VERSION_MAJOR > 1 && XERCES_VERSION_MINOR > 1
459    XMLString::release(data);
460 #else
461    delete [] *data;
462    *data = 0;
463 #endif
464 }
465 
466 #endif  // XMLBLASTER_XERCES_PLUGIN
467 
468 }}}} // namespace
469 #endif // _UTIL_PARSER_SAX2XERCESPARSER_C


syntax highlighted by Code2HTML, v. 0.9.1