1 /*-----------------------------------------------------------------------------
2 Name: Sax2Parser.cpp
3 Project: xmlBlaster.org
4 Copyright: xmlBlaster.org, see xmlBlaster-LICENSE file
5 Comment: Default handling of Sax callbacks
6 -----------------------------------------------------------------------------*/
7
8 #ifndef _UTIL_PARSER_SAX2PARSER_C
9 #define _UTIL_PARSER_SAX2PARSER_C
10
11 #if defined(_WIN32)
12 #pragma warning(disable:4786)
13 #endif
14
15 #include <util/parser/Sax2Parser.h>
16 #include <xercesc/sax/SAXException.hpp>
17 #include <xercesc/sax2/SAX2XMLReader.hpp>
18 #include <xercesc/sax2/XMLReaderFactory.hpp>
19 #include <xercesc/util/PlatformUtils.hpp>
20 #include <xercesc/framework/MemBufInputSource.hpp>
21 #include <util/XmlBlasterException.h>
22 #include <util/Global.h>
23 #include <util/lexical_cast.h>
24 #include <iostream>
25 //#include <cstdlib> //<stdlib.h>
26
27 namespace org { namespace xmlBlaster { namespace util { namespace parser {
28
29 using namespace std;
30
31 static const int ENCODERBUFFERSIZE = 16*1024;
32
33 Sax2Parser::Sax2Parser(org::xmlBlaster::util::Global& global, XmlHandlerBase *handler) :
34 I_Parser(handler), ME("Sax2Parser"), global_(global), log_(global.getLog("org.xmlBlaster.util.xml")),
35 xmlBlasterTranscoder_(0)
36 {
37 if (log_.call()) log_.trace(ME, "Creating new Sax2Parser");
38
39 //"UTF-8" is currently not supported with our std::string usage!
40 encoding_ = global_.getProperty().getStringProperty("xmlBlaster/encoding", "iso-8859-1");
41
42 XMLTransService::Codes resCode;
43 xmlBlasterTranscoder_ = XMLPlatformUtils::fgTransService->makeNewTranscoderFor(encoding_.c_str(), resCode, ENCODERBUFFERSIZE);
44 if (resCode != 0/*XMLTransService::Codes::Ok*/) {
45 log_.error(ME, "Creation of XMLTranscoder with encoding='" + encoding_ + "' failed with error code " + lexical_cast<string>((int)resCode) +
46 ". Please check your SAX parser setting '-xmlBlaster/encoding'");
47 throw XmlBlasterException(USER_CONFIGURATION, ME, "Creation of XMLTranscoder with encoding='" + encoding_ + "' failed with " + lexical_cast<string>((int)resCode));
48 }
49 else {
50 if (log_.trace()) log_.trace(ME, "Created XMLTranscoder res=" + lexical_cast<string>((int)resCode) + " with encoding=" + encoding_);
51 }
52 }
53
54 Sax2Parser::~Sax2Parser()
55 {
56 delete xmlBlasterTranscoder_;
57 }
58
59 std::string Sax2Parser::usage()
60 {
61 std::string text = string("");
62 //text += string("\n");
63 text += string("\nThe xerces SAX XML parser plugin configuration:");
64 text += string("\n -xmlBlaster/encoding [iso-8859-1]");
65 text += string("\n The parser encoding to use for xmlBlaster specific QoS and key SAX parsing");
66 text += string("\n");
67 return text;
68 }
69
70 void Sax2Parser::init(const string &xmlLiteral)
71 {
72 if (xmlLiteral.size() > 0) {
73 parse(xmlLiteral);
74 }
75 }
76
77 /**
78 * Does the actual parsing
79 * @param xmlData Quality of service in XML notation
80 */
81 void Sax2Parser::parse(const string &xmlData)
82 {
83 //if (log_.call()) log_.call(ME, "parse");
84 //if (log_.trace()) log_.trace(ME, string("parse content:'") + xmlData + string("'"));
85
86 SAX2XMLReader *parser = NULL;
87 //XMLCh* encodingHelper = NULL;
88 try {
89 parser = XMLReaderFactory::createXMLReader();
90 parser->setContentHandler(this);
91 parser->setErrorHandler(this);
92 parser->setLexicalHandler(this);
93
94 // "UTF-8" "iso-8859-1"
95 //string xmlData1 = string("<?xml version=\"1.0\" encoding=\""+encoding_+"\"?>\n") + xmlData;
96 const string &xmlData1 = xmlData;
97 //log_.info(ME, "Parsing now: " + xmlData1);
98
99 MemBufInputSource inSource((const XMLByte*)xmlData1.c_str(), (unsigned int)(xmlData1.size()), "xmlBlaster", false);
100
101 XMLCh tempStr[100];
102 XMLString::transcode(encoding_.c_str(), tempStr, 99);
103 inSource.setEncoding(tempStr);
104 //encodingHelper = XMLString::transcode(encoding.c_str());
105 //inSource.setEncoding(encodingHelper);
106 //Sax2Parser::releaseXMLCh(&encodingHelper);
107
108 parser->parse(inSource);
109 delete parser;
110 }
111 catch (StopParseException&) {
112 // If it does not work, it could be wrapped into SAXParseException
113 log_.error(ME, string("StopParseException: ") +
114 "Parsing execution stopped half the way ");
115 if (log_.trace()) {
116 string help = XmlBlasterException::getStackTrace();
117 log_.plain(ME, help);
118 }
119 delete parser; // just in case it did not
120 return;
121 }
122 catch (XmlBlasterException& ex) {
123 throw ex;
124 }
125 catch (SAXParseException &err) {
126 string loc = getLocationString(err) + string(": ") + getStringValue(err.getMessage());
127 delete parser;
128 throw XmlBlasterException(USER_ILLEGALARGUMENT, ME + "::parse", string("SAXParseException") + loc);
129 }
130 catch (SAXNotRecognizedException &err) {
131 string msg = getStringValue(err.getMessage());
132 delete parser;
133 throw XmlBlasterException(USER_ILLEGALARGUMENT, ME + "::parse", string("SAXNotRecognizedException: ") + msg);
134 }
135 catch (SAXNotSupportedException &err) {
136 string msg = getStringValue(err.getMessage());
137 delete parser;
138 throw XmlBlasterException(USER_ILLEGALARGUMENT, ME + "::parse", string("SAXNotSupportedException: ") + msg);
139 }
140 catch (const XMLException &err) {
141 string msg = getStringValue(err.getMessage());
142 delete parser;
143 throw XmlBlasterException(USER_ILLEGALARGUMENT, ME + "::parse", string("XMLException: ") + msg);
144 }
145 catch (SAXException &err) {
146 string msg = getStringValue(err.getMessage());
147 delete parser;
148 throw XmlBlasterException(USER_ILLEGALARGUMENT, ME + "::parse", string("SAXException: ") + msg);
149 }
150 catch (const std::exception& err) { // catches all of bad_alloc, bad_cast, runtime_error, ...
151 string msg = err.what() + string(": ") + xmlData;
152 delete parser;
153 throw XmlBlasterException(USER_ILLEGALARGUMENT, ME + "::parse", string("std:exception: ") + msg);
154 }
155 catch (const string& err) {
156 string msg = err;
157 delete parser;
158 throw XmlBlasterException(INTERNAL_UNKNOWN, ME + "::parse", string("string exception. message:") + err + ": " + xmlData);
159 }
160 catch (const char* err) {
161 string msg = err;
162 delete parser;
163 throw XmlBlasterException(INTERNAL_UNKNOWN, ME + "::parse", string("char *exception. message:") + err + ": " + xmlData);
164 }
165 catch (...) {
166 delete parser;
167 throw XmlBlasterException(INTERNAL_UNKNOWN, ME + "::parse", string("Unknown parse exception: ") + xmlData);
168 }
169 }
170
171 /** Receive notification of the end of the document. */
172 void Sax2Parser::endDocument()
173 {
174 if (log_.call()) log_.call(ME, string("endDocument"));
175 handler_->endDocument();
176 }
177
178 /** Receive notification of the end of an element. */
179 void Sax2Parser::endElement(const XMLCh *const /*uri*/, const XMLCh *const /*localname*/, const XMLCh *const qname)
180 {
181 //if (log_.call()) log_.call(ME, string("endElement"));
182 handler_->endElement(getStringValue(qname));
183 }
184
185 /** Receive notification of the beginning of the document. */
186 void Sax2Parser::startDocument()
187 {
188 //if (log_.call()) log_.call(ME, string("startDocument"));
189 handler_->startDocument();
190 }
191
192 /** Receive notification of the start of an element. */
193 void Sax2Parser::startElement(const XMLCh *const /*uri*/, const XMLCh *const /*localname*/, const XMLCh *const qname, const Attributes &attrs)
194 {
195 //if (log_.call()) log_.call(ME, "startElement <" + name + ">");
196 AttributeMap tmpMap;
197 handler_->startElement(getStringValue(qname), getAttributeMap(tmpMap, attrs));
198 }
199
200 /** Receive notification of the end of a CDATA section. */
201 void Sax2Parser::endCDATA()
202 {
203 //if (log_.call()) log_.call(ME, string("endCDATA"));
204 handler_->endCDATA();
205 }
206
207 /** Receive notification of the start of a CDATA section. */
208 void Sax2Parser::startCDATA()
209 {
210 //if (log_.call()) log_.call(ME, string("startCDATA"));
211 handler_->startCDATA();
212 }
213
214 /** Receive notification of character data inside an element. */
215 void Sax2Parser::characters(const XMLCh *const chars, const unsigned int length)
216 {
217 //if (log_.call()) log_.call(ME, string("characters"));
218 string tmp;
219 bool doTrim = false;
220 tmp.assign(getStringValue(chars, doTrim), 0, length);
221 handler_->characters(tmp);
222 }
223
224 //
225 // ErrorHandler methods
226 //
227
228 /** Warning. */
229 void Sax2Parser::warning(const SAXParseException &ex)
230 {
231 if (log_.call()) log_.call(ME, string("warning"));
232 string txt = getLocationString(ex) + "\n";
233 handler_->warning(txt);
234 }
235
236
237 /** Error. */
238 void Sax2Parser::error(const SAXParseException &ex)
239 {
240 if (log_.call()) log_.call(ME, string("error"));
241 string txt = getLocationString(ex) + "\n";
242 handler_->error(txt);
243 }
244
245
246 /** Fatal error. */
247 void Sax2Parser::fatalError(const SAXParseException &ex)
248 {
249 if (log_.call()) log_.call(ME, string("fatalError"));
250 string txt = getLocationString(ex) + "\n";
251 handler_->fatalError(txt);
252 }
253
254
255 /** Returns a string of the location. */
256 string Sax2Parser::getLocationString(const SAXParseException &ex)
257 {
258 string systemId = getStringValue(ex.getSystemId());
259 string str;
260 if (systemId != "") {
261 string::size_type index = systemId.find_last_of('/');
262 if (index != string::npos) systemId = systemId.substr(index + 1);
263 str = systemId + ":";
264 }
265 string message = Sax2Parser::getStringValue(ex.getMessage(), true);
266 return str + "line=" + lexical_cast<std::string>(ex.getLineNumber())
267 + "/col=" + lexical_cast<std::string>(ex.getColumnNumber()) + " " + message;
268 }
269
270
271 /**
272 * Compares two strings (where name1 is a Unicode3.0 string!!) for
273 * unsensitive case compare. It returns true if the content of the
274 * strings is equal (no matter what the case is). Using this method to
275 * compare the strings should be portable to all platforms supported by
276 * xerces.
277 */
278 bool Sax2Parser::caseCompare(const XMLCh *name1, const char *name2)
279 {
280 XMLCh* name1Helper = XMLString::replicate(name1);
281 XMLString::upperCase(name1Helper);
282 XMLCh* name2Helper = XMLString::transcode(name2);
283 XMLString::upperCase(name2Helper);
284 bool ret = (XMLString::compareIString(name1Helper, name2Helper) == 0);
285 Sax2Parser::releaseXMLCh(&name1Helper);
286 Sax2Parser::releaseXMLCh(&name2Helper);
287 return ret;
288 }
289
290
291 /**
292 * returns a trimmed value (usually from an attribute) as a string
293 */
294 string Sax2Parser::getStringValue(const XMLCh* const value, bool doTrim) const
295 {
296 /* Works only with US-ASCII:
297 char* help = 0;
298 try {
299 string ret;
300 help = XMLString::transcode(value);
301 if (help != 0) {
302 if (doTrim) ret = StringTrim::trim(help);
303 else ret = string(help);
304 Sax2Parser::releaseXMLCh(&help);
305 }
306 }
307 catch (...) {
308 if (help != 0)
309 Sax2Parser::releaseXMLCh(&help);
310 cerr << "Caught exception in getStringValue(XMLCh=" << value << ")" << endl;
311 // throw;
312 }
313 */
314 if (value == NULL) {
315 return "";
316 }
317
318 /*
319 Converts from the encoding of the service to the internal XMLCh* encoding.
320 unsigned int
321 XMLUTF8Transcoder::transcodeFrom(const XMLByte* const srcData
322 , const unsigned int srcCount
323 , XMLCh* const toFill
324 , const unsigned int maxChars
325 , unsigned int& bytesEaten
326 , unsigned char* const charSizes)
327 */
328 /*
329 Converts from the internal XMLCh* encoding to the encoding of the service.
330 Parameters:
331 srcData the source buffer to be transcoded
332 srcCount number of characters in the source buffer
333 toFill the destination buffer
334 maxBytes the max number of bytes in the destination buffer
335 charsEaten after transcoding, this will hold the number of chars that were processed from the source buffer
336 options options to pass to the transcoder that explain how to respond to an unrepresentable character
337
338 Returns:
339 Returns the number of chars put into the target buffer
340 unsigned int
341 XMLUTF8Transcoder::transcodeTo( const XMLCh* const srcData
342 , const unsigned int srcCount
343 , XMLByte* const toFill
344 , const unsigned int maxBytes
345 , unsigned int& charsEaten
346 , const UnRepOpts options)
347
348 */
349
350 unsigned int charsEatenFromSource = 0;
351 unsigned int counter = 0;
352 string result;
353 unsigned int charsToRead = XMLString::stringLen(value);
354 do {
355 char resultXMLString_Encoded[ENCODERBUFFERSIZE+4];
356 *resultXMLString_Encoded = 0;
357 charsEatenFromSource = 0;
358 int charsPutToTarget = xmlBlasterTranscoder_->transcodeTo(value+counter,
359 XMLString::stringLen(value)-counter,
360 (XMLByte*) resultXMLString_Encoded,
361 ENCODERBUFFERSIZE,
362 charsEatenFromSource,
363 XMLTranscoder::UnRep_Throw );
364
365 /*
366 log_.info(ME,"TRANSCODE TMP: got '" + result +
367 "' charsToRead= " + lexical_cast<string>(charsToRead) +
368 "' ENCODERBUFFERSIZE= " + lexical_cast<string>(ENCODERBUFFERSIZE) +
369 " charsEaten=" + lexical_cast<string>(charsEatenFromSource) +
370 " counter=" + lexical_cast<string>(counter) +
371 " charsPutToTarget=" + lexical_cast<string>(charsPutToTarget));
372 */
373 if (charsEatenFromSource < 1)
374 break;
375 result += string(resultXMLString_Encoded, charsPutToTarget);
376 counter += charsEatenFromSource;
377 }
378 while(charsEatenFromSource < charsToRead); //charsEatenFromSource== ENCODERBUFFERSIZE || charsPutToTarget == ENCODERBUFFERSIZE);
379
380 //log_.info(ME,"TRANSCODE DONE: got '" + result + "' ENCODERBUFFERSIZE= " + lexical_cast<string>(ENCODERBUFFERSIZE) + " charsEaten=" + lexical_cast<string>(charsEatenFromSource));
381
382 if (doTrim) StringTrim::trim(result);
383
384 return result;
385 }
386
387
388 AttributeMap& Sax2Parser::getAttributeMap(AttributeMap& attrMap, const Attributes &attrs)
389 {
390 int len = attrs.getLength();
391 for (int i = 0; i < len; i++) {
392 attrMap[getStringValue(attrs.getQName(i))] = getStringValue(attrs.getValue(i));
393 }
394 return attrMap;
395 }
396
397 /**
398 * gets the attribute specified by 'name' in the attribute list specified by 'list'. The result is put in
399 * the 'value' argument which is passed by reference. It returns 'true' if the attribute was found in the
400 * specified attribute list or 'false' if it was not. In the later case, the value is untouched by this
401 * method.
402 */
403 bool Sax2Parser::getStringAttr(const Attributes& attrs, const XMLCh* const name, string& value, bool doTrim) const
404 {
405 const XMLCh* tmp = attrs.getValue(name);
406 if (!tmp) return false;
407
408 char* help1 = NULL;
409 try {
410 help1 = XMLString::transcode(tmp);
411 if (!help1) return false;
412 if (doTrim) {
413 value.assign(StringTrim::trim(help1));
414 }
415 else value.assign(help1);
416 }
417 catch (...) {}
418 Sax2Parser::releaseXMLCh(&help1);
419 return true;
420 }
421
422
423 void Sax2Parser::releaseXMLCh(XMLCh** data)
424 {
425 #if XERCES_VERSION_MAJOR > 1 && XERCES_VERSION_MINOR > 1
426 XMLString::release(data);
427 #else
428 delete [] *data;
429 *data = 0;
430 #endif
431 }
432
433 void Sax2Parser::releaseXMLCh(char** data)
434 {
435 #if XERCES_VERSION_MAJOR > 1 && XERCES_VERSION_MINOR > 1
436 XMLString::release(data);
437 #else
438 delete [] *data;
439 *data = 0;
440 #endif
441 }
442
443 #endif
444
445 }}}} // namespace
syntax highlighted by Code2HTML, v. 0.9.1