1 /*-----------------------------------------------------------------------------
2 Name: Sax2XercesParser.cpp
3 Project: xmlBlaster.org
4 Copyright: xmlBlaster.org, see xmlBlaster-LICENSE file
5 Comment: Default handling of Sax callbacks
6 -----------------------------------------------------------------------------*/
7
8 #ifndef _UTIL_PARSER_SAX2XERCESPARSER_C
9 #define _UTIL_PARSER_SAX2XERCESPARSER_C
10
11 #if defined(XMLBLASTER_MSXML_PLUGIN)
12 # error Implement Microsoft XML parser for /DXMLBLASTER_MSXML_PLUGIN
13 #else // XMLBLASTER_XERCES_PLUGIN
14
15 #if defined(_WIN32)
16 #pragma warning(disable:4786)
17 #endif
18
19 #include <util/parser/Sax2XercesParser.h>
20 #include <xercesc/sax/SAXException.hpp>
21 #include <xercesc/sax2/SAX2XMLReader.hpp>
22 #include <xercesc/sax2/XMLReaderFactory.hpp>
23 #include <xercesc/util/PlatformUtils.hpp>
24 #include <xercesc/util/XercesVersion.hpp>
25 #include <xercesc/framework/MemBufInputSource.hpp>
26 #include <util/XmlBlasterException.h>
27 #include <util/Global.h>
28 #include <util/lexical_cast.h>
29 #include <iostream>
30 //#include <cstdlib> //<stdlib.h>
31
32 namespace org { namespace xmlBlaster { namespace util { namespace parser {
33
34 using namespace std;
35
36 static const int ENCODERBUFFERSIZE = 16*1024;
37
38 Sax2Parser::Sax2Parser(org::xmlBlaster::util::Global& global, XmlHandlerBase *handler) :
39 I_Parser(handler), ME("Sax2Parser"), global_(global), log_(global.getLog("org.xmlBlaster.util.xml")),
40 xmlBlasterTranscoder_(0)
41 {
42 if (log_.call()) log_.trace(ME, "Creating new Sax2Parser");
43
44 //"UTF-8" is currently not supported with our std::string usage!
45 encoding_ = global_.getProperty().getStringProperty("xmlBlaster/encoding", "iso-8859-1");
46
47 XMLTransService::Codes resCode;
48 xmlBlasterTranscoder_ = XMLPlatformUtils::fgTransService->makeNewTranscoderFor(encoding_.c_str(), resCode, ENCODERBUFFERSIZE);
49 if (resCode != 0/*XMLTransService::Codes::Ok*/) {
50 log_.error(ME, "Creation of XMLTranscoder with encoding='" + encoding_ + "' failed with error code " + lexical_cast<string>((int)resCode) +
51 ". Please check your SAX parser setting '-xmlBlaster/encoding'");
52 throw XmlBlasterException(USER_CONFIGURATION, ME, "Creation of XMLTranscoder with encoding='" + encoding_ + "' failed with " + lexical_cast<string>((int)resCode));
53 }
54 else {
55 if (log_.trace()) log_.trace(ME, "Created XMLTranscoder res=" + lexical_cast<string>((int)resCode) + " with encoding=" + encoding_);
56 }
57 }
58
59 Sax2Parser::~Sax2Parser()
60 {
61 delete xmlBlasterTranscoder_;
62 }
63
64 std::string Sax2Parser::usage()
65 {
66 std::string text = string("");
67 //text += string("\n");
68 text += string("\nThe xerces SAX XML parser plugin configuration:");
69 text += string("\n -xmlBlaster/encoding [iso-8859-1]");
70 text += string("\n The parser encoding to use for xmlBlaster specific QoS and key SAX parsing");
71 text += string("\n");
72 return text;
73 }
74
75 void Sax2Parser::init(const string &xmlLiteral)
76 {
77 if (xmlLiteral.size() > 0) {
78 parse(xmlLiteral);
79 }
80 }
81
82 /**
83 * Does the actual parsing
84 * @param xmlData Quality of service in XML notation
85 */
86 void Sax2Parser::parse(const string &xmlData)
87 {
88 //if (log_.call()) log_.call(ME, "parse");
89 //if (log_.trace()) log_.trace(ME, string("parse content:'") + xmlData + string("'"));
90
91 SAX2XMLReader *parser = NULL;
92 //XMLCh* encodingHelper = NULL;
93 try {
94 parser = XMLReaderFactory::createXMLReader();
95 parser->setContentHandler(this);
96 parser->setErrorHandler(this);
97 parser->setLexicalHandler(this);
98
99 // "UTF-8" "iso-8859-1"
100 //string xmlData1 = string("<?xml version=\"1.0\" encoding=\""+encoding_+"\"?>\n") + xmlData;
101 const string &xmlData1 = xmlData;
102 //log_.info(ME, "Parsing now: " + xmlData1);
103
104 MemBufInputSource inSource((const XMLByte*)xmlData1.c_str(), (unsigned int)(xmlData1.size()), "xmlBlaster", false);
105
106 XMLCh tempStr[100];
107 XMLString::transcode(encoding_.c_str(), tempStr, 99);
108 inSource.setEncoding(tempStr);
109 //encodingHelper = XMLString::transcode(encoding.c_str());
110 //inSource.setEncoding(encodingHelper);
111 //Sax2Parser::releaseXMLCh(&encodingHelper);
112
113 parser->parse(inSource);
114 delete parser;
115 }
116 catch (StopParseException&) {
117 // If it does not work, it could be wrapped into SAXParseException
118 log_.error(ME, string("StopParseException: ") +
119 "Parsing execution stopped half the way ");
120 if (log_.trace()) {
121 string help = XmlBlasterException::getStackTrace();
122 log_.plain(ME, help);
123 }
124 delete parser; // just in case it did not
125 return;
126 }
127 catch (XmlBlasterException& ex) {
128 throw ex;
129 }
130 catch (SAXParseException &err) {
131 string loc = getLocationString(err) + string(": ") + getStringValue(err.getMessage());
132 delete parser;
133 throw XmlBlasterException(USER_ILLEGALARGUMENT, ME + "::parse", string("SAXParseException") + loc);
134 }
135 catch (SAXNotRecognizedException &err) {
136 string msg = getStringValue(err.getMessage());
137 delete parser;
138 throw XmlBlasterException(USER_ILLEGALARGUMENT, ME + "::parse", string("SAXNotRecognizedException: ") + msg);
139 }
140 catch (SAXNotSupportedException &err) {
141 string msg = getStringValue(err.getMessage());
142 delete parser;
143 throw XmlBlasterException(USER_ILLEGALARGUMENT, ME + "::parse", string("SAXNotSupportedException: ") + msg);
144 }
145 catch (const XMLException &err) {
146 string msg = getStringValue(err.getMessage());
147 delete parser;
148 throw XmlBlasterException(USER_ILLEGALARGUMENT, ME + "::parse", string("XMLException: ") + msg);
149 }
150 catch (SAXException &err) {
151 string msg = getStringValue(err.getMessage());
152 delete parser;
153 throw XmlBlasterException(USER_ILLEGALARGUMENT, ME + "::parse", string("SAXException: ") + msg);
154 }
155 catch (const std::exception& err) { // catches all of bad_alloc, bad_cast, runtime_error, ...
156 string msg = err.what() + string(": ") + xmlData;
157 delete parser;
158 throw XmlBlasterException(USER_ILLEGALARGUMENT, ME + "::parse", string("std:exception: ") + msg);
159 }
160 catch (const string& err) {
161 string msg = err;
162 delete parser;
163 throw XmlBlasterException(INTERNAL_UNKNOWN, ME + "::parse", string("string exception. message:") + err + ": " + xmlData);
164 }
165 catch (const char* err) {
166 string msg = err;
167 delete parser;
168 throw XmlBlasterException(INTERNAL_UNKNOWN, ME + "::parse", string("char *exception. message:") + err + ": " + xmlData);
169 }
170 catch (...) {
171 delete parser;
172 throw XmlBlasterException(INTERNAL_UNKNOWN, ME + "::parse", string("Unknown parse exception: ") + xmlData);
173 }
174 }
175
176 /** Receive notification of the end of the document. */
177 void Sax2Parser::endDocument()
178 {
179 if (log_.call()) log_.call(ME, string("endDocument"));
180 handler_->endDocument();
181 }
182
183 /** Receive notification of the end of an element. */
184 void Sax2Parser::endElement(const XMLCh *const /*uri*/, const XMLCh *const /*localname*/, const XMLCh *const qname)
185 {
186 //if (log_.call()) log_.call(ME, string("endElement"));
187 handler_->endElement(getStringValue(qname));
188 }
189
190 /** Receive notification of the beginning of the document. */
191 void Sax2Parser::startDocument()
192 {
193 //if (log_.call()) log_.call(ME, string("startDocument"));
194 handler_->startDocument();
195 }
196
197 /** Receive notification of the start of an element. */
198 void Sax2Parser::startElement(const XMLCh *const /*uri*/, const XMLCh *const /*localname*/, const XMLCh *const qname, const Attributes &attrs)
199 {
200 AttributeMap tmpMap;
201 handler_->startElement(getStringValue(qname), getAttributeMap(tmpMap, attrs));
202 }
203
204 /** Receive notification of the end of a CDATA section. */
205 void Sax2Parser::endCDATA()
206 {
207 //if (log_.call()) log_.call(ME, string("endCDATA"));
208 handler_->endCDATA();
209 }
210
211 /** Receive notification of the start of a CDATA section. */
212 void Sax2Parser::startCDATA()
213 {
214 //if (log_.call()) log_.call(ME, string("startCDATA"));
215 handler_->startCDATA();
216 }
217
218 /** Receive notification of character data inside an element. */
219 #if _XERCES_VERSION >= 30000
220 void Sax2Parser::characters (const XMLCh *const chars, const XMLSize_t length) // xerces 3
221 {
222 //if (log_.call()) log_.call(ME, string("characters"));
223 string tmp;
224 bool doTrim = false;
225 tmp.assign(getStringValue(chars, doTrim), 0, length);
226 handler_->characters(tmp);
227 }
228 #else
229 void Sax2Parser::characters(const XMLCh *const chars, const unsigned int length) // xerces 2
230 {
231 //if (log_.call()) log_.call(ME, string("characters"));
232 string tmp;
233 bool doTrim = false;
234 tmp.assign(getStringValue(chars, doTrim), 0, length);
235 handler_->characters(tmp);
236 }
237 #endif
238
239 //
240 // ErrorHandler methods
241 //
242
243 /** Warning. */
244 void Sax2Parser::warning(const SAXParseException &ex)
245 {
246 if (log_.call()) log_.call(ME, string("warning"));
247 string txt = getLocationString(ex) + "\n";
248 handler_->warning(txt);
249 }
250
251
252 /** Error. */
253 void Sax2Parser::error(const SAXParseException &ex)
254 {
255 if (log_.call()) log_.call(ME, string("error"));
256 string txt = getLocationString(ex) + "\n";
257 handler_->error(txt);
258 }
259
260
261 /** Fatal error. */
262 void Sax2Parser::fatalError(const SAXParseException &ex)
263 {
264 if (log_.call()) log_.call(ME, string("fatalError"));
265 string txt = getLocationString(ex) + "\n";
266 handler_->fatalError(txt);
267 }
268
269
270 /** Returns a string of the location. */
271 string Sax2Parser::getLocationString(const SAXParseException &ex)
272 {
273 string systemId = getStringValue(ex.getSystemId());
274 string str;
275 if (systemId != "") {
276 string::size_type index = systemId.find_last_of('/');
277 if (index != string::npos) systemId = systemId.substr(index + 1);
278 str = systemId + ":";
279 }
280 string message = Sax2Parser::getStringValue(ex.getMessage(), true);
281 return str + "line=" + lexical_cast<std::string>(ex.getLineNumber())
282 + "/col=" + lexical_cast<std::string>(ex.getColumnNumber()) + " " + message;
283 }
284
285
286 /**
287 * Compares two strings (where name1 is a Unicode3.0 string!!) for
288 * unsensitive case compare. It returns true if the content of the
289 * strings is equal (no matter what the case is). Using this method to
290 * compare the strings should be portable to all platforms supported by
291 * xerces.
292 */
293 bool Sax2Parser::caseCompare(const XMLCh *name1, const char *name2)
294 {
295 XMLCh* name1Helper = XMLString::replicate(name1);
296 XMLString::upperCase(name1Helper);
297 XMLCh* name2Helper = XMLString::transcode(name2);
298 XMLString::upperCase(name2Helper);
299 bool ret = (XMLString::compareIString(name1Helper, name2Helper) == 0);
300 Sax2Parser::releaseXMLCh(&name1Helper);
301 Sax2Parser::releaseXMLCh(&name2Helper);
302 return ret;
303 }
304
305
306 /**
307 * returns a trimmed value (usually from an attribute) as a string
308 */
309 string Sax2Parser::getStringValue(const XMLCh* const value, bool doTrim) const
310 {
311 /* Works only with US-ASCII:
312 char* help = 0;
313 try {
314 string ret;
315 help = XMLString::transcode(value);
316 if (help != 0) {
317 if (doTrim) ret = StringTrim::trim(help);
318 else ret = string(help);
319 Sax2Parser::releaseXMLCh(&help);
320 }
321 }
322 catch (...) {
323 if (help != 0)
324 Sax2Parser::releaseXMLCh(&help);
325 cerr << "Caught exception in getStringValue(XMLCh=" << value << ")" << endl;
326 // throw;
327 }
328 */
329 if (value == NULL) {
330 return "";
331 }
332
333 /*
334 Converts from the encoding of the service to the internal XMLCh* encoding.
335 unsigned int
336 XMLUTF8Transcoder::transcodeFrom(const XMLByte* const srcData
337 , const unsigned int srcCount
338 , XMLCh* const toFill
339 , const unsigned int maxChars
340 , unsigned int& bytesEaten
341 , unsigned char* const charSizes)
342 */
343 /*
344 Converts from the internal XMLCh* encoding to the encoding of the service.
345 Parameters:
346 srcData the source buffer to be transcoded
347 srcCount number of characters in the source buffer
348 toFill the destination buffer
349 maxBytes the max number of bytes in the destination buffer
350 charsEaten after transcoding, this will hold the number of chars that were processed from the source buffer
351 options options to pass to the transcoder that explain how to respond to an unrepresentable character
352
353 Returns:
354 Returns the number of chars put into the target buffer
355 unsigned int
356 XMLUTF8Transcoder::transcodeTo( const XMLCh* const srcData
357 , const unsigned int srcCount
358 , XMLByte* const toFill
359 , const unsigned int maxBytes
360 , unsigned int& charsEaten
361 , const UnRepOpts options)
362
363 */
364
365 unsigned int charsEatenFromSource = 0;
366 unsigned int counter = 0;
367 string result;
368 unsigned int charsToRead = XMLString::stringLen(value);
369 do {
370 char resultXMLString_Encoded[ENCODERBUFFERSIZE+4];
371 *resultXMLString_Encoded = 0;
372 charsEatenFromSource = 0;
373 #if XERCES_VERSION_MAJOR >= 3
374 int charsPutToTarget = xmlBlasterTranscoder_->transcodeTo(value+counter,
375 XMLString::stringLen(value)-counter,
376 (XMLByte*) resultXMLString_Encoded,
377 (XMLSize_t) ENCODERBUFFERSIZE,
378 (XMLSize_t&) charsEatenFromSource,
379 XMLTranscoder::UnRep_Throw );
380 #else
381 int charsPutToTarget = xmlBlasterTranscoder_->transcodeTo(value+counter,
382 XMLString::stringLen(value)-counter,
383 (XMLByte*) resultXMLString_Encoded,
384 ENCODERBUFFERSIZE,
385 charsEatenFromSource,
386 XMLTranscoder::UnRep_Throw );
387 #endif
388 /*
389 log_.info(ME,"TRANSCODE TMP: got '" + result +
390 "' charsToRead= " + lexical_cast<string>(charsToRead) +
391 "' ENCODERBUFFERSIZE= " + lexical_cast<string>(ENCODERBUFFERSIZE) +
392 " charsEaten=" + lexical_cast<string>(charsEatenFromSource) +
393 " counter=" + lexical_cast<string>(counter) +
394 " charsPutToTarget=" + lexical_cast<string>(charsPutToTarget));
395 */
396 if (charsEatenFromSource < 1)
397 break;
398 result += string(resultXMLString_Encoded, charsPutToTarget);
399 counter += charsEatenFromSource;
400 }
401 while(charsEatenFromSource < charsToRead); //charsEatenFromSource== ENCODERBUFFERSIZE || charsPutToTarget == ENCODERBUFFERSIZE);
402
403 //log_.info(ME,"TRANSCODE DONE: got '" + result + "' ENCODERBUFFERSIZE= " + lexical_cast<string>(ENCODERBUFFERSIZE) + " charsEaten=" + lexical_cast<string>(charsEatenFromSource));
404
405 if (doTrim) StringTrim::trim(result);
406
407 return result;
408 }
409
410
411 AttributeMap& Sax2Parser::getAttributeMap(AttributeMap& attrMap, const Attributes &attrs)
412 {
413 int len = attrs.getLength();
414 for (int i = 0; i < len; i++) {
415 attrMap[getStringValue(attrs.getQName(i))] = getStringValue(attrs.getValue(i));
416 }
417 return attrMap;
418 }
419
420 /**
421 * gets the attribute specified by 'name' in the attribute list specified by 'list'. The result is put in
422 * the 'value' argument which is passed by reference. It returns 'true' if the attribute was found in the
423 * specified attribute list or 'false' if it was not. In the later case, the value is untouched by this
424 * method.
425 */
426 bool Sax2Parser::getStringAttr(const Attributes& attrs, const XMLCh* const name, string& value, bool doTrim) const
427 {
428 const XMLCh* tmp = attrs.getValue(name);
429 if (!tmp) return false;
430
431 char* help1 = NULL;
432 try {
433 help1 = XMLString::transcode(tmp);
434 if (!help1) return false;
435 if (doTrim) {
436 value.assign(StringTrim::trim(help1));
437 }
438 else value.assign(help1);
439 }
440 catch (...) {}
441 Sax2Parser::releaseXMLCh(&help1);
442 return true;
443 }
444
445
446 void Sax2Parser::releaseXMLCh(XMLCh** data)
447 {
448 #if XERCES_VERSION_MAJOR > 1 && XERCES_VERSION_MINOR > 1
449 XMLString::release(data);
450 #else
451 delete [] *data;
452 *data = 0;
453 #endif
454 }
455
456 void Sax2Parser::releaseXMLCh(char** data)
457 {
458 #if XERCES_VERSION_MAJOR > 1 && XERCES_VERSION_MINOR > 1
459 XMLString::release(data);
460 #else
461 delete [] *data;
462 *data = 0;
463 #endif
464 }
465
466 #endif // XMLBLASTER_XERCES_PLUGIN
467
468 }}}} // namespace
469 #endif // _UTIL_PARSER_SAX2XERCESPARSER_C
syntax highlighted by Code2HTML, v. 0.9.1