1 /*-----------------------------------------------------------------------------
  2 Name:      Sax2Parser.cpp
  3 Project:   xmlBlaster.org
  4 Copyright: xmlBlaster.org, see xmlBlaster-LICENSE file
  5 Comment:   Default handling of Sax callbacks
  6 -----------------------------------------------------------------------------*/
  7 
  8 #ifndef _UTIL_PARSER_SAX2PARSER_C
  9 #define _UTIL_PARSER_SAX2PARSER_C
 10 
 11 #if defined(_WIN32)
 12   #pragma warning(disable:4786)
 13 #endif
 14 
 15 #include <util/parser/Sax2Parser.h>
 16 #include <xercesc/sax/SAXException.hpp>
 17 #include <xercesc/sax2/SAX2XMLReader.hpp>
 18 #include <xercesc/sax2/XMLReaderFactory.hpp>
 19 #include <xercesc/util/PlatformUtils.hpp>
 20 #include <xercesc/framework/MemBufInputSource.hpp>
 21 #include <util/XmlBlasterException.h>
 22 #include <util/Global.h>
 23 #include <util/lexical_cast.h>
 24 #include <iostream>
 25 //#include <cstdlib> //<stdlib.h>
 26 
 27 namespace org { namespace xmlBlaster { namespace util { namespace parser {
 28 
 29 using namespace std;
 30 
 31 static const int ENCODERBUFFERSIZE = 16*1024;
 32 
 33 Sax2Parser::Sax2Parser(org::xmlBlaster::util::Global& global, XmlHandlerBase *handler) : 
 34     I_Parser(handler), ME("Sax2Parser"), global_(global), log_(global.getLog("org.xmlBlaster.util.xml")),
 35     xmlBlasterTranscoder_(0)
 36 {
 37    if (log_.call()) log_.trace(ME, "Creating new Sax2Parser");
 38 
 39    //"UTF-8" is currently not supported with our std::string usage!
 40    encoding_ = global_.getProperty().getStringProperty("xmlBlaster/encoding", "iso-8859-1");
 41 
 42    XMLTransService::Codes resCode;
 43    xmlBlasterTranscoder_ = XMLPlatformUtils::fgTransService->makeNewTranscoderFor(encoding_.c_str(), resCode, ENCODERBUFFERSIZE);
 44    if (resCode != 0/*XMLTransService::Codes::Ok*/) {
 45       log_.error(ME, "Creation of XMLTranscoder with encoding='" + encoding_ + "' failed with error code " + lexical_cast<string>((int)resCode) +
 46                      ". Please check your SAX parser setting '-xmlBlaster/encoding'");
 47       throw XmlBlasterException(USER_CONFIGURATION, ME, "Creation of XMLTranscoder with encoding='" + encoding_ + "' failed with " + lexical_cast<string>((int)resCode));
 48    }
 49    else {
 50       if (log_.trace()) log_.trace(ME, "Created XMLTranscoder res=" + lexical_cast<string>((int)resCode) + " with encoding=" + encoding_);
 51    }
 52 }
 53 
 54 Sax2Parser::~Sax2Parser()
 55 {
 56    delete xmlBlasterTranscoder_;
 57 }
 58 
 59 std::string Sax2Parser::usage() 
 60 {
 61    std::string text = string("");
 62    //text += string("\n");
 63    text += string("\nThe xerces SAX XML parser plugin configuration:");
 64    text += string("\n   -xmlBlaster/encoding [iso-8859-1]");
 65    text += string("\n                       The parser encoding to use for xmlBlaster specific QoS and key SAX parsing");
 66    text += string("\n");
 67    return text;
 68 }
 69 
 70 void Sax2Parser::init(const string &xmlLiteral) 
 71 {
 72    if (xmlLiteral.size() > 0) {
 73       parse(xmlLiteral);
 74    }
 75 }
 76       
 77 /**
 78  * Does the actual parsing
 79  * @param xmlData Quality of service in XML notation
 80  */
 81 void Sax2Parser::parse(const string &xmlData) 
 82 {
 83    //if (log_.call()) log_.call(ME, "parse");
 84    //if (log_.trace()) log_.trace(ME, string("parse content:'") + xmlData + string("'"));
 85  
 86    SAX2XMLReader *parser = NULL;
 87    //XMLCh* encodingHelper = NULL;
 88    try {
 89       parser = XMLReaderFactory::createXMLReader();
 90       parser->setContentHandler(this);
 91       parser->setErrorHandler(this);
 92       parser->setLexicalHandler(this);
 93 
 94       // "UTF-8"  "iso-8859-1"
 95       //string xmlData1 = string("<?xml version=\"1.0\" encoding=\""+encoding_+"\"?>\n") + xmlData;
 96       const string &xmlData1 = xmlData;
 97       //log_.info(ME, "Parsing now: " + xmlData1);
 98       
 99       MemBufInputSource inSource((const XMLByte*)xmlData1.c_str(), (unsigned int)(xmlData1.size()), "xmlBlaster", false);
100       
101       XMLCh tempStr[100];
102       XMLString::transcode(encoding_.c_str(), tempStr, 99);
103       inSource.setEncoding(tempStr);
104       //encodingHelper = XMLString::transcode(encoding.c_str());
105       //inSource.setEncoding(encodingHelper);
106       //Sax2Parser::releaseXMLCh(&encodingHelper);
107 
108       parser->parse(inSource);
109       delete parser;
110    }
111    catch (StopParseException&) {
112       // If it does not work, it could be wrapped into SAXParseException
113       log_.error(ME, string("StopParseException: ") +
114                               "Parsing execution stopped half the way ");
115       if (log_.trace()) {
116          string help = XmlBlasterException::getStackTrace();
117          log_.plain(ME, help);
118       }
119       delete parser; // just in case it did not 
120       return;
121    }
122    catch (XmlBlasterException& ex) {
123       throw ex;
124    }
125    catch (SAXParseException &err) {
126       string loc = getLocationString(err) + string(": ") + getStringValue(err.getMessage());
127       delete parser;
128       throw XmlBlasterException(USER_ILLEGALARGUMENT, ME + "::parse", string("SAXParseException") + loc);
129    }
130    catch (SAXNotRecognizedException &err) {
131       string msg = getStringValue(err.getMessage());
132       delete parser;
133       throw XmlBlasterException(USER_ILLEGALARGUMENT, ME + "::parse", string("SAXNotRecognizedException: ") + msg);
134         }
135    catch (SAXNotSupportedException &err) {
136       string msg = getStringValue(err.getMessage());
137       delete parser;
138       throw XmlBlasterException(USER_ILLEGALARGUMENT, ME + "::parse", string("SAXNotSupportedException: ") + msg);
139         }
140    catch (const XMLException &err) {
141       string msg = getStringValue(err.getMessage());
142       delete parser;
143       throw XmlBlasterException(USER_ILLEGALARGUMENT, ME + "::parse", string("XMLException: ") + msg);
144    }
145    catch (SAXException &err) {
146       string msg = getStringValue(err.getMessage());
147       delete parser;
148       throw XmlBlasterException(USER_ILLEGALARGUMENT, ME + "::parse", string("SAXException: ") + msg);
149    }
150    catch (const std::exception& err) { // catches all of bad_alloc, bad_cast, runtime_error, ...
151       string msg = err.what() + string(": ") + xmlData;
152       delete parser;
153       throw XmlBlasterException(USER_ILLEGALARGUMENT, ME + "::parse", string("std:exception: ") + msg);
154    }
155    catch (const string& err) {
156      string msg = err;
157      delete parser;
158      throw XmlBlasterException(INTERNAL_UNKNOWN, ME + "::parse", string("string exception. message:") + err + ": " + xmlData);
159    }
160    catch (const char* err) {
161      string msg = err;
162      delete parser;
163      throw XmlBlasterException(INTERNAL_UNKNOWN, ME + "::parse", string("char *exception. message:") + err + ": " + xmlData);
164    }
165    catch (...) {
166      delete parser;
167      throw XmlBlasterException(INTERNAL_UNKNOWN, ME + "::parse", string("Unknown parse exception: ") + xmlData);
168    }
169 }
170 
171 /** Receive notification of the end of the document. */
172 void Sax2Parser::endDocument() 
173 {
174    if (log_.call()) log_.call(ME, string("endDocument"));
175    handler_->endDocument();
176 }
177 
178 /** Receive notification of the end of an element. */
179 void Sax2Parser::endElement(const XMLCh *const /*uri*/, const XMLCh *const /*localname*/, const XMLCh *const qname)
180 {
181    //if (log_.call()) log_.call(ME, string("endElement"));
182    handler_->endElement(getStringValue(qname));
183 }
184 
185 /** Receive notification of the beginning of the document. */
186 void Sax2Parser::startDocument()
187 {
188    //if (log_.call()) log_.call(ME, string("startDocument"));
189    handler_->startDocument();
190 }
191 
192 /** Receive notification of the start of an element. */
193 void Sax2Parser::startElement(const XMLCh *const /*uri*/, const XMLCh *const /*localname*/, const XMLCh *const qname, const Attributes &attrs)
194 {
195    //if (log_.call()) log_.call(ME, "startElement <" + name + ">");
196    AttributeMap tmpMap;
197    handler_->startElement(getStringValue(qname), getAttributeMap(tmpMap, attrs));
198 }
199 
200 /** Receive notification of the end of a CDATA section. */
201 void Sax2Parser::endCDATA()
202 {
203    //if (log_.call()) log_.call(ME, string("endCDATA"));
204    handler_->endCDATA();
205 }
206 
207 /** Receive notification of the start of a CDATA section. */
208 void Sax2Parser::startCDATA()
209 {
210    //if (log_.call()) log_.call(ME, string("startCDATA"));
211    handler_->startCDATA();
212 }
213 
214 /** Receive notification of character data inside an element. */
215 void Sax2Parser::characters(const XMLCh *const chars, const unsigned int length)
216 {
217    //if (log_.call()) log_.call(ME, string("characters"));
218    string tmp;
219    bool doTrim = false;
220    tmp.assign(getStringValue(chars, doTrim), 0, length);
221    handler_->characters(tmp);
222 }
223 
224 //
225 // ErrorHandler methods
226 //
227 
228 /** Warning. */
229 void Sax2Parser::warning(const SAXParseException &ex) 
230 {
231    if (log_.call()) log_.call(ME, string("warning"));
232    string txt = getLocationString(ex) + "\n";
233    handler_->warning(txt);
234 }
235       
236       
237 /** Error. */
238 void Sax2Parser::error(const SAXParseException &ex) 
239 {
240    if (log_.call()) log_.call(ME, string("error"));
241    string txt = getLocationString(ex) + "\n";
242    handler_->error(txt);
243 }
244 
245 
246 /** Fatal error. */
247 void Sax2Parser::fatalError(const SAXParseException &ex) 
248 {
249    if (log_.call()) log_.call(ME, string("fatalError"));
250    string txt = getLocationString(ex) + "\n";
251    handler_->fatalError(txt);
252 }
253 
254 
255 /** Returns a string of the location. */
256 string Sax2Parser::getLocationString(const SAXParseException &ex) 
257 {
258   string systemId = getStringValue(ex.getSystemId());
259   string str;
260   if (systemId != "") {
261     string::size_type index = systemId.find_last_of('/');
262     if (index != string::npos) systemId = systemId.substr(index + 1);
263     str = systemId + ":";
264   }
265   string message = Sax2Parser::getStringValue(ex.getMessage(), true);
266   return str + "line=" + lexical_cast<std::string>(ex.getLineNumber()) 
267       + "/col=" + lexical_cast<std::string>(ex.getColumnNumber()) + " " + message;
268 }
269 
270 
271 /**
272  * Compares two strings (where name1 is a Unicode3.0 string!!) for 
273  * unsensitive case compare. It returns true if the content of the
274  * strings is equal (no matter what the case is). Using this method to
275  * compare the strings should be portable to all platforms supported by
276  * xerces.
277  */
278 bool Sax2Parser::caseCompare(const XMLCh *name1, const char *name2) 
279 {
280   XMLCh* name1Helper = XMLString::replicate(name1);
281   XMLString::upperCase(name1Helper);
282   XMLCh* name2Helper = XMLString::transcode(name2);
283   XMLString::upperCase(name2Helper);
284   bool ret = (XMLString::compareIString(name1Helper, name2Helper) == 0);
285   Sax2Parser::releaseXMLCh(&name1Helper);
286   Sax2Parser::releaseXMLCh(&name2Helper);
287   return ret;
288 }
289 
290 
291 /**
292  * returns a trimmed value (usually from an attribute) as a string
293  */
294 string Sax2Parser::getStringValue(const XMLCh* const value, bool doTrim) const
295 {
296    /* Works only with US-ASCII:
297    char* help = 0;
298    try {
299       string ret;
300       help = XMLString::transcode(value);
301       if (help != 0) {
302          if (doTrim) ret = StringTrim::trim(help);
303          else ret = string(help);
304          Sax2Parser::releaseXMLCh(&help);
305       }
306    }
307    catch (...) {
308       if (help != 0)
309          Sax2Parser::releaseXMLCh(&help);
310       cerr << "Caught exception in getStringValue(XMLCh=" << value << ")" << endl;
311       // throw;
312    }
313    */
314    if (value == NULL) {
315       return "";
316    }
317 
318 /*
319 Converts from the encoding of the service to the internal XMLCh* encoding.
320 unsigned int
321 XMLUTF8Transcoder::transcodeFrom(const  XMLByte* const          srcData
322                                 , const unsigned int            srcCount
323                                 ,       XMLCh* const            toFill
324                                 , const unsigned int            maxChars
325                                 ,       unsigned int&           bytesEaten
326                                 ,       unsigned char* const    charSizes)
327 */
328 /*
329 Converts from the internal XMLCh* encoding to the encoding of the service.
330 Parameters:
331     srcData     the source buffer to be transcoded
332     srcCount    number of characters in the source buffer
333     toFill      the destination buffer
334     maxBytes    the max number of bytes in the destination buffer
335     charsEaten  after transcoding, this will hold the number of chars that were processed from the source buffer
336     options     options to pass to the transcoder that explain how to respond to an unrepresentable character
337 
338 Returns:
339     Returns the number of chars put into the target buffer 
340 unsigned int
341 XMLUTF8Transcoder::transcodeTo( const   XMLCh* const    srcData
342                                 , const unsigned int    srcCount
343                                 ,       XMLByte* const  toFill
344                                 , const unsigned int    maxBytes
345                                 ,       unsigned int&   charsEaten
346                                 , const UnRepOpts       options)
347 
348 */
349 
350    unsigned int charsEatenFromSource = 0;
351    unsigned int counter = 0;
352    string result;
353    unsigned int charsToRead = XMLString::stringLen(value);
354    do {
355       char resultXMLString_Encoded[ENCODERBUFFERSIZE+4];
356       *resultXMLString_Encoded = 0;
357       charsEatenFromSource = 0;
358       int charsPutToTarget = xmlBlasterTranscoder_->transcodeTo(value+counter,
359                                     XMLString::stringLen(value)-counter,
360                                     (XMLByte*) resultXMLString_Encoded,
361                                     ENCODERBUFFERSIZE,
362                                     charsEatenFromSource,
363                                     XMLTranscoder::UnRep_Throw );
364 
365       /*
366       log_.info(ME,"TRANSCODE TMP: got '" + result +
367                    "' charsToRead= " + lexical_cast<string>(charsToRead) +
368                    "' ENCODERBUFFERSIZE= " + lexical_cast<string>(ENCODERBUFFERSIZE) +
369                    " charsEaten=" + lexical_cast<string>(charsEatenFromSource) +
370                    " counter=" + lexical_cast<string>(counter) +
371                    " charsPutToTarget=" + lexical_cast<string>(charsPutToTarget));
372       */
373       if (charsEatenFromSource < 1)
374          break;
375       result += string(resultXMLString_Encoded, charsPutToTarget);
376       counter += charsEatenFromSource;
377    }
378    while(charsEatenFromSource < charsToRead); //charsEatenFromSource== ENCODERBUFFERSIZE || charsPutToTarget == ENCODERBUFFERSIZE);
379 
380    //log_.info(ME,"TRANSCODE DONE: got '" + result + "' ENCODERBUFFERSIZE= " + lexical_cast<string>(ENCODERBUFFERSIZE) + " charsEaten=" + lexical_cast<string>(charsEatenFromSource));
381 
382    if (doTrim) StringTrim::trim(result);
383 
384    return result;
385 }
386 
387 
388 AttributeMap& Sax2Parser::getAttributeMap(AttributeMap& attrMap, const Attributes &attrs)
389 {
390    int len = attrs.getLength();
391    for (int i = 0; i < len; i++) {
392       attrMap[getStringValue(attrs.getQName(i))] = getStringValue(attrs.getValue(i));
393    }
394    return attrMap;
395 }
396 
397 /**
398  * gets the attribute specified by 'name' in the attribute list specified by 'list'. The result is put in 
399  * the 'value' argument which is passed by reference. It returns 'true' if the attribute was found in the
400  * specified attribute list or 'false' if it was not. In the later case, the value is untouched by this 
401  * method.
402  */
403 bool Sax2Parser::getStringAttr(const Attributes& attrs, const XMLCh* const name, string& value, bool doTrim) const
404 {
405    const XMLCh* tmp = attrs.getValue(name);
406    if (!tmp) return false;
407 
408    char* help1 = NULL;
409    try {
410       help1 = XMLString::transcode(tmp);
411       if (!help1) return false;
412       if (doTrim) {
413          value.assign(StringTrim::trim(help1));
414       }
415       else value.assign(help1);
416    }
417    catch (...) {}
418    Sax2Parser::releaseXMLCh(&help1);
419    return true;
420 }
421 
422 
423 void Sax2Parser::releaseXMLCh(XMLCh** data)
424 {
425 #if XERCES_VERSION_MAJOR > 1 && XERCES_VERSION_MINOR > 1
426    XMLString::release(data);
427 #else
428    delete [] *data;
429    *data = 0;
430 #endif
431 }
432 
433 void Sax2Parser::releaseXMLCh(char** data)
434 {
435 #if XERCES_VERSION_MAJOR > 1 && XERCES_VERSION_MINOR > 1
436    XMLString::release(data);
437 #else
438    delete [] *data;
439    *data = 0;
440 #endif
441 }
442 
443 #endif
444 
445 }}}} // namespace


syntax highlighted by Code2HTML, v. 0.9.1