sot-talos-balance  2.0.5
Collection of dynamic-graph entities aimed at implementing balance control on talos.
Markup.cpp
Go to the documentation of this file.
1 #define _CRT_SECURE_NO_WARNINGS
2 #define NOMINMAX
3 
4 // Markup.cpp: implementation of the NBC_CMarkup class.
5 //
6 // NBC_CMarkup Release 6.5 Lite
7 // Copyright (C) 1999-2003 First Objective Software, Inc. All rights reserved
8 // This entire notice must be retained in this source code
9 // Redistributing this source code requires written permission
10 // This software is provided "as is", with no warranty.
11 // Latest fixes enhancements and documentation at www.firstobject.com
12 
14 
15 #include <assert.h>
16 #include <stdarg.h>
17 #include <string.h>
18 
19 #include <algorithm>
20 
21 #ifdef _DEBUG
22 #undef THIS_FILE
23 static char THIS_FILE[] = __FILE__;
24 #define new DEBUG_NEW
25 #endif
26 
27 void CMarkup::SetIndent(int nIndent) { mnIndent = nIndent; }
28 
29 void CMarkup::operator=(const CMarkup& markup) {
30  m_iPosParent = markup.m_iPosParent;
31  m_iPos = markup.m_iPos;
32  m_iPosChild = markup.m_iPosChild;
33  m_iPosFree = markup.m_iPosFree;
34  m_nNodeType = markup.m_nNodeType;
35  m_aPos.clear();
36  m_aPos = markup.m_aPos;
37  m_csDoc = markup.m_csDoc;
39 }
40 
41 bool CMarkup::SetDoc(const char* szDoc) {
42  // Reset indexes
43  m_iPosFree = 1;
44  ResetPos();
45 
46  // Set document text
47  if (szDoc)
48  m_csDoc = szDoc;
49  else
50  m_csDoc.erase();
51 
52  // Starting size of position array: 1 element per 64 bytes of document
53  // Tight fit when parsing small doc, only 0 to 2 reallocs when parsing large
54  // doc Start at 8 when creating new document
55  std::string::size_type nStartSize = m_csDoc.length() / 64 + 8;
56  if (m_aPos.size() < nStartSize) m_aPos.resize(nStartSize);
57 
58  // Parse document
59  bool bWellFormed = false;
60  if (m_csDoc.length()) {
61  m_aPos[0].Clear();
62  int iPos = x_ParseElem(0);
63  if (iPos > 0) {
64  m_aPos[0].iElemChild = iPos;
65  bWellFormed = true;
66  }
67  }
68 
69  // Clear indexes if parse failed or empty document
70  if (!bWellFormed) {
71  m_aPos[0].Clear();
72  m_iPosFree = 1;
73  }
74 
75  ResetPos();
76 
77  memset(mtIndent, ' ', sizeof(mtIndent));
78  mtIndent[999] = 0;
79  return bWellFormed;
80 }
81 
83  if (!(m_aPos.empty()) && m_aPos[0].iElemChild) return true;
84  return false;
85 }
86 
87 bool CMarkup::FindElem(const char* szName) {
88  // Change current position only if found
89  //
90  if (!m_aPos.empty()) {
91  int iPos = x_FindElem(m_iPosParent, m_iPos, szName);
92  if (iPos) {
93  // Assign new position
94  x_SetPos(m_aPos[iPos].iElemParent, iPos, 0);
95  return true;
96  }
97  }
98  return false;
99 }
100 
101 bool CMarkup::FindChildElem(const char* szName) {
102  // Change current child position only if found
103  //
104  // Shorthand: call this with no current main position
105  // means find child under root element
106  if (!m_iPos) FindElem();
107 
108  int iPosChild = x_FindElem(m_iPos, m_iPosChild, szName);
109  if (iPosChild) {
110  // Assign new position
111  int iPos = m_aPos[iPosChild].iElemParent;
112  x_SetPos(m_aPos[iPos].iElemParent, iPos, iPosChild);
113  return true;
114  }
115 
116  return false;
117 }
118 
119 std::string CMarkup::GetTagName() const {
120  // Return the tag name at the current main position
121  std::string csTagName;
122 
123  if (m_iPos) csTagName = x_GetTagName(m_iPos);
124  return csTagName;
125 }
126 
128  // If there is no child position and IntoElem is called it will succeed in
129  // release 6.3 (A subsequent call to FindElem will find the first element) The
130  // following short-hand behavior was never part of EDOM and was misleading It
131  // would find a child element if there was no current child element position
132  // and go into it It is removed in release 6.3, this change is NOT backwards
133  // compatible! if (! m_iPosChild)
134  // FindChildElem();
135 
136  if (m_iPos && m_nNodeType == MNT_ELEMENT) {
138  return true;
139  }
140  return false;
141 }
142 
144  // Go to parent element
145  if (m_iPosParent) {
147  return true;
148  }
149  return false;
150 }
151 
153 // Private Methods
155 
157  //
158  // This returns the index of the next unused ElemPos in the array
159  //
160  if (m_iPosFree == (int)m_aPos.size())
161  m_aPos.resize(m_iPosFree + m_iPosFree / 2);
162  ++m_iPosFree;
163  return m_iPosFree - 1;
164 }
165 
167  //
168  // This decrements the index of the next unused ElemPos in the array
169  // allowing the element index returned by GetFreePos() to be reused
170  //
171  --m_iPosFree;
172  return 0;
173 }
174 
175 int CMarkup::x_ParseError(const char* szError, const char* szName) {
176  if (szName)
177  m_csError = Format(szError, szName);
178  else
179  m_csError = szError;
180  x_ReleasePos();
181  return -1;
182 }
183 
184 int CMarkup::x_ParseElem(int iPosParent) {
185  // This is either called by SetDoc, x_AddSubDoc, or itself recursively
186  // m_aPos[iPosParent].nEndL is where to start parsing for the child element
187  // This returns the new position if a tag is found, otherwise zero
188  // In all cases we need to get a new ElemPos, but release it if unused
189  //
190  int iPos = x_GetFreePos();
191  m_aPos[iPos].nStartL = m_aPos[iPosParent].nEndL;
192  m_aPos[iPos].iElemParent = iPosParent;
193  m_aPos[iPos].iElemChild = 0;
194  m_aPos[iPos].iElemNext = 0;
195 
196  // Start Tag
197  // A loop is used to ignore all remarks tags and special tags
198  // i.e. <?xml version="1.0"?>, and <!-- comment here -->
199  // So any tag beginning with ? or ! is ignored
200  // Loop past ignored tags
201  TokenPos token(m_csDoc.c_str());
202  token.nNext = m_aPos[iPosParent].nEndL;
203  std::string csName;
204  while (csName.empty()) {
205  // Look for left angle bracket of start tag
206  m_aPos[iPos].nStartL = token.nNext;
207  if (!x_FindChar(token.szDoc, m_aPos[iPos].nStartL, '<'))
208  return x_ParseError("Element tag not found");
209 
210  // Set parent's End tag to start looking from here (or later)
211  m_aPos[iPosParent].nEndL = m_aPos[iPos].nStartL;
212 
213  // Determine whether this is an element, or bypass other type of node
214  token.nNext = m_aPos[iPos].nStartL + 1;
215  if (x_FindToken(token)) {
216  if (token.bIsString) return x_ParseError("Tag starts with quote");
217  char cFirstChar = m_csDoc[token.nL];
218  if (cFirstChar == '?' || cFirstChar == '!') {
219  token.nNext = m_aPos[iPos].nStartL;
220  if (!x_ParseNode(token)) return x_ParseError("Invalid node");
221  } else if (cFirstChar != '/') {
222  csName = x_GetToken(token);
223  // Look for end of tag
224  if (!x_FindChar(token.szDoc, token.nNext, '>'))
225  return x_ParseError("End of tag not found");
226  } else
227  return x_ReleasePos(); // probably end tag of parent
228  } else
229  return x_ParseError("Abrupt end within tag");
230  }
231  m_aPos[iPos].nStartR = token.nNext;
232 
233  // Is ending mark within start tag, i.e. empty element?
234  if (m_csDoc[m_aPos[iPos].nStartR - 1] == '/') {
235  // Empty element
236  // Close tag left is set to ending mark, and right to open tag right
237  m_aPos[iPos].nEndL = m_aPos[iPos].nStartR - 1;
238  m_aPos[iPos].nEndR = m_aPos[iPos].nStartR;
239  } else // look for end tag
240  {
241  // Element probably has contents
242  // Determine where to start looking for left angle bracket of end tag
243  // This is done by recursively parsing the contents of this element
244  int iInner, iInnerPrev = 0;
245  m_aPos[iPos].nEndL = m_aPos[iPos].nStartR + 1;
246  while ((iInner = x_ParseElem(iPos)) > 0) {
247  // Set links to iInner
248  if (iInnerPrev)
249  m_aPos[iInnerPrev].iElemNext = iInner;
250  else
251  m_aPos[iPos].iElemChild = iInner;
252  iInnerPrev = iInner;
253 
254  // Set offset to reflect child
255  m_aPos[iPos].nEndL = m_aPos[iInner].nEndR + 1;
256  }
257  if (iInner == -1) return -1;
258 
259  // Look for left angle bracket of end tag
260  if (!x_FindChar(token.szDoc, m_aPos[iPos].nEndL, '<'))
261  return x_ParseError("End tag of %s element not found", csName.c_str());
262 
263  // Look through tokens of end tag
264  token.nNext = m_aPos[iPos].nEndL + 1;
265  int nTokenCount = 0;
266  while (x_FindToken(token)) {
267  ++nTokenCount;
268  if (!token.bIsString) {
269  // Is first token not an end slash mark?
270  if (nTokenCount == 1 && m_csDoc[token.nL] != '/')
271  return x_ParseError("Expecting end tag of element %s",
272  csName.c_str());
273 
274  else if (nTokenCount == 2 && !token.Match(csName.c_str()))
275  return x_ParseError("End tag does not correspond to %s",
276  csName.c_str());
277 
278  // Else is it a right angle bracket?
279  else if (m_csDoc[token.nL] == '>')
280  break;
281  }
282  }
283 
284  // Was a right angle bracket not found?
285  if (!token.szDoc[token.nL] || nTokenCount < 2)
286  return x_ParseError("End tag not completed for element %s",
287  csName.c_str());
288  m_aPos[iPos].nEndR = token.nL;
289  }
290 
291  // Successfully parsed element (and contained elements)
292  return iPos;
293 }
294 
295 bool CMarkup::x_FindChar(const char* szDoc, int& nChar, char c) {
296  // static function
297  const char* pChar = &szDoc[nChar];
298  while (*pChar && *pChar != c) pChar += 1; //_tclen(pChar);
299  nChar = (int)(pChar - szDoc);
300  if (!*pChar) return false;
301  /*
302  while (szDoc[nChar] && szDoc[nChar] != c)
303  nChar += _tclen(&szDoc[nChar]);
304  if (! szDoc[nChar])
305  return false;
306  */
307  return true;
308 }
309 
310 bool CMarkup::x_FindAny(const char* szDoc, int& nChar) {
311  // Starting at nChar, find a non-whitespace char
312  // return false if no non-whitespace before end of document, nChar points to
313  // end otherwise return true and nChar points to non-whitespace char
314  while (szDoc[nChar] && strchr(" \t\n\r", szDoc[nChar])) ++nChar;
315  return szDoc[nChar] != '\0';
316 }
317 
319  // Starting at token.nNext, bypass whitespace and find the next token
320  // returns true on success, members of token point to token
321  // returns false on end of document, members point to end of document
322  const char* szDoc = token.szDoc;
323  int nChar = token.nNext;
324  token.bIsString = false;
325 
326  // By-pass leading whitespace
327  if (!x_FindAny(szDoc, nChar)) {
328  // No token was found before end of document
329  token.nL = nChar;
330  token.nR = nChar;
331  token.nNext = nChar;
332  return false;
333  }
334 
335  // Is it an opening quote?
336  char cFirstChar = szDoc[nChar];
337  if (cFirstChar == '\"' || cFirstChar == '\'') {
338  token.bIsString = true;
339 
340  // Move past opening quote
341  ++nChar;
342  token.nL = nChar;
343 
344  // Look for closing quote
345  x_FindChar(token.szDoc, nChar, cFirstChar);
346 
347  // Set right to before closing quote
348  token.nR = nChar - 1;
349 
350  // Set nChar past closing quote unless at end of document
351  if (szDoc[nChar]) ++nChar;
352  } else {
353  // Go until special char or whitespace
354  token.nL = nChar;
355  while (szDoc[nChar] && !strchr(" \t\n\r<>=\\/?!", szDoc[nChar]))
356  nChar += 1; //_tclen(&szDoc[nChar]);
357 
358  // Adjust end position if it is one special char
359  if (nChar == token.nL) ++nChar; // it is a special char
360  token.nR = nChar - 1;
361  }
362 
363  // nNext points to one past last char of token
364  token.nNext = nChar;
365  return true;
366 }
367 
368 std::string CMarkup::x_GetToken(const CMarkup::TokenPos& token) const {
369  // The token contains indexes into the document identifying a small substring
370  // Build the substring from those indexes and return it
371  if (token.nL > token.nR) return "";
372  return Mid(
373  m_csDoc, token.nL,
374  token.nR - token.nL + ((token.nR < (int)(m_csDoc.length())) ? 1 : 0));
375 }
376 
377 int CMarkup::x_FindElem(int iPosParent, int iPos, const char* szPath) {
378  // If szPath is NULL or empty, go to next sibling element
379  // Otherwise go to next sibling element with matching path
380  //
381  if (iPos)
382  iPos = m_aPos[iPos].iElemNext;
383  else
384  iPos = m_aPos[iPosParent].iElemChild;
385 
386  // Finished here if szPath not specified
387  if (szPath == NULL || !szPath[0]) return iPos;
388 
389  // Search
390  TokenPos token(m_csDoc.c_str());
391  while (iPos) {
392  // Compare tag name
393  token.nNext = m_aPos[iPos].nStartL + 1;
394  x_FindToken(token); // Locate tag name
395  if (token.Match(szPath)) return iPos;
396  iPos = m_aPos[iPos].iElemNext;
397  }
398  return 0;
399 }
400 
402  // Call this with token.nNext set to the start of the node
403  // This returns the node type and token.nNext set to the char after the node
404  // If the node is not found or an element, token.nR is not determined
405  int nTypeFound = 0;
406  const char* szDoc = token.szDoc;
407  token.nL = token.nNext;
408  if (szDoc[token.nL] == '<') {
409  // Started with <, could be:
410  // <!--...--> comment
411  // <!DOCTYPE ...> dtd
412  // <?target ...?> processing instruction
413  // <![CDATA[...]]> cdata section
414  // <NAME ...> element
415  //
416  if (!szDoc[token.nL + 1] || !szDoc[token.nL + 2]) return 0;
417  char cFirstChar = szDoc[token.nL + 1];
418  const char* szEndOfNode = NULL;
419  if (cFirstChar == '?') {
420  nTypeFound = MNT_PROCESSING_INSTRUCTION;
421  szEndOfNode = "?>";
422  } else if (cFirstChar == '!') {
423  char cSecondChar = szDoc[token.nL + 2];
424  if (cSecondChar == '[') {
425  nTypeFound = MNT_CDATA_SECTION;
426  szEndOfNode = "]]>";
427  } else if (cSecondChar == '-') {
428  nTypeFound = MNT_COMMENT;
429  szEndOfNode = "-->";
430  } else {
431  // Document type requires tokenizing because of strings and brackets
432  nTypeFound = 0;
433  int nBrackets = 0;
434  while (x_FindToken(token)) {
435  if (!token.bIsString) {
436  char cChar = szDoc[token.nL];
437  if (cChar == '[')
438  ++nBrackets;
439  else if (cChar == ']')
440  --nBrackets;
441  else if (nBrackets == 0 && cChar == '>') {
442  nTypeFound = MNT_DOCUMENT_TYPE;
443  break;
444  }
445  }
446  }
447  if (!nTypeFound) return 0;
448  }
449  } else if (cFirstChar == '/') {
450  // End tag means no node found within parent element
451  return 0;
452  } else {
453  nTypeFound = MNT_ELEMENT;
454  }
455 
456  // Search for end of node if not found yet
457  if (szEndOfNode) {
458  const char* pEnd = strstr(&szDoc[token.nNext], szEndOfNode);
459  if (!pEnd) return 0; // not well-formed
460  token.nNext = (int)(pEnd - szDoc) + (int)strlen(szEndOfNode);
461  }
462  } else if (szDoc[token.nL]) {
463  // It is text or whitespace because it did not start with <
464  nTypeFound = MNT_WHITESPACE;
465  token.nNext = token.nL;
466  if (x_FindAny(szDoc, token.nNext)) {
467  if (szDoc[token.nNext] != '<') {
468  nTypeFound = MNT_TEXT;
469  x_FindChar(szDoc, token.nNext, '<');
470  }
471  }
472  }
473  return nTypeFound;
474 }
475 
476 std::string CMarkup::x_GetTagName(int iPos) const {
477  // Return the tag name at specified element
478  TokenPos token(m_csDoc.c_str());
479  token.nNext = m_aPos[iPos].nStartL + 1;
480  if (!iPos || !x_FindToken(token)) return "";
481 
482  // Return substring of document
483  return x_GetToken(token);
484 }
485 
487  const char* szAttrib) const {
488  // If szAttrib is NULL find next attrib, otherwise find named attrib
489  // Return true if found
490  int nAttrib = 0;
491  for (int nCount = 0; x_FindToken(token); ++nCount) {
492  if (!token.bIsString) {
493  // Is it the right angle bracket?
494  char cChar = m_csDoc[token.nL];
495  if (cChar == '>' || cChar == '/' || cChar == '?')
496  break; // attrib not found
497 
498  // Equal sign
499  if (cChar == '=') continue;
500 
501  // Potential attribute
502  if (!nAttrib && nCount) {
503  // Attribute name search?
504  if (!szAttrib || !szAttrib[0])
505  return true; // return with token at attrib name
506 
507  // Compare szAttrib
508  if (token.Match(szAttrib)) nAttrib = nCount;
509  }
510  } else if (nAttrib && nCount == nAttrib + 2) {
511  return true;
512  }
513  }
514 
515  // Not found
516  return false;
517 }
518 
519 std::string CMarkup::x_GetAttrib(int iPos, const char* szAttrib) const {
520  // Return the value of the attrib
521  TokenPos token(m_csDoc.c_str());
522  if (iPos && m_nNodeType == MNT_ELEMENT)
523  token.nNext = m_aPos[iPos].nStartL + 1;
524  else
525  return "";
526 
527  if (szAttrib && x_FindAttrib(token, szAttrib))
528  return x_TextFromDoc(
529  token.nL, token.nR - ((token.nR < (int)(m_csDoc.length())) ? 0 : 1));
530  return "";
531 }
532 
533 bool CMarkup::x_SetAttrib(int iPos, const char* szAttrib, const char* szValue) {
534  // Set attribute in iPos element
535  TokenPos token(m_csDoc.c_str());
536  int nInsertAt;
537  if (iPos && m_nNodeType == MNT_ELEMENT) {
538  token.nNext = m_aPos[iPos].nStartL + 1;
539  nInsertAt = m_aPos[iPos].nStartR - (m_aPos[iPos].IsEmptyElement() ? 1 : 0);
540  } else
541  return false;
542 
543  // Create insertion text depending on whether attribute already exists
544  int nReplace = 0;
545  std::string csInsert;
546  if (x_FindAttrib(token, szAttrib)) {
547  // Replace value only
548  // Decision: for empty value leaving attrib="" instead of removing attrib
549  csInsert = x_TextToDoc(szValue, true);
550  nInsertAt = token.nL;
551  nReplace = token.nR - token.nL + 1;
552  } else {
553  // Insert string name value pair
554  std::string csFormat;
555  csFormat = " ";
556  csFormat += szAttrib;
557  csFormat += "=\"";
558  csFormat += x_TextToDoc(szValue, true);
559  csFormat += "\"";
560  csInsert = csFormat;
561  }
562 
563  x_DocChange(nInsertAt, nReplace, csInsert);
564  int nAdjust = (int)csInsert.length() - nReplace;
565  m_aPos[iPos].nStartR += nAdjust;
566  m_aPos[iPos].AdjustEnd(nAdjust);
567  x_Adjust(iPos, nAdjust);
569  return true;
570 }
571 
572 std::string CMarkup::x_GetData(int iPos) const {
573  // Return a string representing data between start and end tag
574  // Return empty string if there are any children elements
575  if (!m_aPos[iPos].iElemChild && !m_aPos[iPos].IsEmptyElement()) {
576  // See if it is a CDATA section
577  const char* szDoc = (const char*)(m_csDoc.c_str());
578  int nChar = m_aPos[iPos].nStartR + 1;
579  if (x_FindAny(szDoc, nChar) && szDoc[nChar] == '<' &&
580  nChar + 11 < m_aPos[iPos].nEndL &&
581  strncmp(&szDoc[nChar], "<![CDATA[", 9) == 0) {
582  nChar += 9;
583  int nEndCDATA = (int)m_csDoc.find("]]>", nChar);
584  if (nEndCDATA != -1 && nEndCDATA < m_aPos[iPos].nEndL) {
585  return Mid(m_csDoc, nChar, nEndCDATA - nChar);
586  }
587  }
588  return x_TextFromDoc(m_aPos[iPos].nStartR + 1, m_aPos[iPos].nEndL - 1);
589  }
590  return "";
591 }
592 
593 std::string CMarkup::x_TextToDoc(const char* szText, bool bAttrib) const {
594  // Convert text as seen outside XML document to XML friendly
595  // replacing special characters with ampersand escape codes
596  // E.g. convert "6>7" to "6&gt;7"
597  //
598  // &lt; less than
599  // &amp; ampersand
600  // &gt; greater than
601  //
602  // and for attributes:
603  //
604  // &apos; apostrophe or single quote
605  // &quot; double quote
606  //
607  static const char* szaReplace[] = {"&lt;", "&amp;", "&gt;", "&apos;",
608  "&quot;"};
609  const char* pFind = bAttrib ? "<&>\'\"" : "<&>";
610  std::string csText;
611  const char* pSource = szText;
612  int nDestSize = (int)strlen(pSource);
613  nDestSize += nDestSize / 10 + 7;
614  char* pDest = GetBuffer(csText, nDestSize);
615  int nLen = 0;
616  char cSource = *pSource;
617  const char* pFound;
618  while (cSource) {
619  if (nLen > nDestSize - 6) {
620  ReleaseBuffer(csText, nLen);
621  nDestSize *= 2;
622  pDest = GetBuffer(csText, nDestSize);
623  }
624  if ((pFound = strchr(pFind, cSource)) != NULL) {
625  pFound = szaReplace[pFound - pFind];
626 #ifdef _WIN32
627  strcpy_s(&pDest[nLen], nDestSize, pFound);
628 #else
629  strncpy(&pDest[nLen], pFound, nDestSize);
630 #endif
631  nLen += (int)strlen(pFound);
632  } else {
633  pDest[nLen] = *pSource;
634  nLen += 1; //_tclen(pSource);
635  }
636  pSource += 1; //_tclen(pSource);
637  cSource = *pSource;
638  }
639  ReleaseBuffer(csText, nLen);
640  return csText;
641 }
642 
643 std::string CMarkup::x_TextFromDoc(int nLeft, int nRight) const {
644  // Convert XML friendly text to text as seen outside XML document
645  // ampersand escape codes replaced with special characters e.g. convert
646  // "6&gt;7" to "6>7" Conveniently the result is always the same or shorter in
647  // byte length
648  //
649  static const char* szaCode[] = {"lt;", "amp;", "gt;", "apos;", "quot;"};
650  static int anCodeLen[] = {3, 4, 3, 5, 5};
651  static const char* szSymbol = "<&>\'\"";
652  std::string csText;
653  const char* pSource = m_csDoc.c_str();
654  int nDestSize = nRight - nLeft + 1;
655  char* pDest = GetBuffer(csText, nDestSize);
656  int nLen = 0;
657  int nCharLen;
658  int nChar = nLeft;
659  while (nChar <= nRight) {
660  if (pSource[nChar] == '&') {
661  // Look for matching &code;
662  bool bCodeConverted = false;
663  for (int nMatch = 0; nMatch < 5; ++nMatch) {
664  if (nChar <= nRight - anCodeLen[nMatch] &&
665  strncmp(szaCode[nMatch], &pSource[nChar + 1], anCodeLen[nMatch]) ==
666  0) {
667  // Insert symbol and increment index past ampersand semi-colon
668  pDest[nLen++] = szSymbol[nMatch];
669  nChar += anCodeLen[nMatch] + 1;
670  bCodeConverted = true;
671  break;
672  }
673  }
674 
675  // If the code is not converted, leave it as is
676  if (!bCodeConverted) {
677  pDest[nLen++] = '&';
678  ++nChar;
679  }
680  } else // not &
681  {
682  nCharLen = 1; //_tclen(&pSource[nChar]);
683  pDest[nLen] = pSource[nChar];
684  nLen += nCharLen;
685  nChar += nCharLen;
686  }
687  }
688  ReleaseBuffer(csText, nLen);
689  return csText;
690 }
691 
692 void CMarkup::x_DocChange(int nLeft, int nReplace,
693  const std::string& csInsert) {
694  // Insert csInsert int m_csDoc at nLeft replacing nReplace chars
695  // Do this with only one buffer reallocation if it grows
696  //
697  int nDocLength = (int)m_csDoc.length();
698  int nInsLength = (int)csInsert.length();
699 
700  // Make sure nLeft and nReplace are within bounds
701  nLeft = std::max(0, std::min(nLeft, nDocLength));
702  nReplace = std::max(0, std::min(nReplace, nDocLength - nLeft));
703 
704  // Get pointer to buffer with enough room
705  int nNewLength = nInsLength + nDocLength - nReplace;
706  int nBufferLen = nNewLength;
707  char* pDoc = GetBuffer(m_csDoc, nBufferLen);
708 
709  // Move part of old doc that goes after insert
710  if (nLeft + nReplace < nDocLength)
711  memmove(&pDoc[nLeft + nInsLength], &pDoc[nLeft + nReplace],
712  (nDocLength - nLeft - nReplace) * sizeof(char));
713 
714  // Copy insert
715  memcpy(&pDoc[nLeft], csInsert.c_str(), nInsLength * sizeof(char));
716 
717  // Release
718  ReleaseBuffer(m_csDoc, nNewLength);
719 }
720 
721 void CMarkup::x_Adjust(int iPos, int nShift, bool bAfterPos) {
722  // Loop through affected elements and adjust indexes
723  // Algorithm:
724  // 1. update children unless bAfterPos
725  // (if no children or bAfterPos is true, end tag of iPos not affected)
726  // 2. update next siblings and their children
727  // 3. go up until there is a next sibling of a parent and update end tags
728  // 4. step 2
729  int iPosTop = m_aPos[iPos].iElemParent;
730  bool bPosFirst = bAfterPos; // mark as first to skip its children
731  while (iPos) {
732  // Were we at containing parent of affected position?
733  bool bPosTop = false;
734  if (iPos == iPosTop) {
735  // Move iPosTop up one towards root
736  iPosTop = m_aPos[iPos].iElemParent;
737  bPosTop = true;
738  }
739 
740  // Traverse to the next update position
741  if (!bPosTop && !bPosFirst && m_aPos[iPos].iElemChild) {
742  // Depth first
743  iPos = m_aPos[iPos].iElemChild;
744  } else if (m_aPos[iPos].iElemNext) {
745  iPos = m_aPos[iPos].iElemNext;
746  } else {
747  // Look for next sibling of a parent of iPos
748  // When going back up, parents have already been done except iPosTop
749  while ((iPos = m_aPos[iPos].iElemParent) != 0 && iPos != iPosTop)
750  if (m_aPos[iPos].iElemNext) {
751  iPos = m_aPos[iPos].iElemNext;
752  break;
753  }
754  }
755  bPosFirst = false;
756 
757  // Shift indexes at iPos
758  if (iPos != iPosTop) m_aPos[iPos].AdjustStart(nShift);
759  m_aPos[iPos].AdjustEnd(nShift);
760  }
761 }
762 
763 void CMarkup::x_LocateNew(int iPosParent, int& iPosRel, int& nOffset,
764  int nLength, int nFlags) {
765  // Determine where to insert new element or node
766  //
767  bool bInsert = (nFlags & 1) ? true : false;
768  bool bHonorWhitespace = (nFlags & 2) ? true : false;
769 
770  std::string::size_type nStartL;
771  if (nLength) {
772  // Located at a non-element node
773  if (bInsert)
774  nStartL = nOffset;
775  else
776  nStartL = nOffset + nLength;
777  } else if (iPosRel) {
778  // Located at an element
779  if (bInsert) // precede iPosRel
780  nStartL = m_aPos[iPosRel].nStartL;
781  else // follow iPosRel
782  nStartL = m_aPos[iPosRel].nEndR + 1;
783  } else if (!iPosParent) {
784  // Outside of all elements
785  if (bInsert)
786  nStartL = 0;
787  else
788  nStartL = m_csDoc.length();
789  } else if (m_aPos[iPosParent].IsEmptyElement()) {
790  // Parent has no separate end tag, so split empty element
791  nStartL = m_aPos[iPosParent].nStartR;
792  } else {
793  if (bInsert) // after start tag
794  nStartL = m_aPos[iPosParent].nStartR + 1;
795  else // before end tag
796  nStartL = m_aPos[iPosParent].nEndL;
797  }
798 
799  // Go up to start of next node, unless its splitting an empty element
800  if (!bHonorWhitespace && !m_aPos[iPosParent].IsEmptyElement()) {
801  const char* szDoc = (const char*)m_csDoc.c_str();
802  int nChar = (int)nStartL;
803  if (!x_FindAny(szDoc, nChar) || szDoc[nChar] == '<') nStartL = nChar;
804  }
805 
806  // Determine iPosBefore
807  int iPosBefore = 0;
808  if (iPosRel) {
809  if (bInsert) {
810  // Is iPosRel past first sibling?
811  int iPosPrev = m_aPos[iPosParent].iElemChild;
812  if (iPosPrev != iPosRel) {
813  // Find previous sibling of iPosRel
814  while (m_aPos[iPosPrev].iElemNext != iPosRel)
815  iPosPrev = m_aPos[iPosPrev].iElemNext;
816  iPosBefore = iPosPrev;
817  }
818  } else {
819  iPosBefore = iPosRel;
820  }
821  } else if (m_aPos[iPosParent].iElemChild) {
822  if (!bInsert) {
823  // Find last element under iPosParent
824  int iPosLast = m_aPos[iPosParent].iElemChild;
825  int iPosNext = iPosLast;
826  while (iPosNext) {
827  iPosLast = iPosNext;
828  iPosNext = m_aPos[iPosNext].iElemNext;
829  }
830  iPosBefore = iPosLast;
831  }
832  }
833 
834  nOffset = (int)nStartL;
835  iPosRel = iPosBefore;
836 }
837 
838 bool CMarkup::x_AddElem(const char* szName, const char* szValue, bool bInsert,
839  bool bAddChild) {
840  if (bAddChild) {
841  // Adding a child element under main position
842  if (!m_iPos) return false;
843  } else if (m_iPosParent == 0) {
844  // Adding root element
845  if (IsWellFormed()) return false;
846 
847  // Locate after any version and DTD
848  m_aPos[0].nEndL = (int)m_csDoc.length();
849  }
850 
851  // Locate where to add element relative to current node
852  int iPosParent, iPosBefore, nOffset = 0, nLength = 0;
853  if (bAddChild) {
854  iPosParent = m_iPos;
855  iPosBefore = m_iPosChild;
856  } else {
857  iPosParent = m_iPosParent;
858  iPosBefore = m_iPos;
859  }
860  int nFlags = bInsert ? 1 : 0;
861  x_LocateNew(iPosParent, iPosBefore, nOffset, nLength, nFlags);
862  // LocateNew: in case of an empty parent it finds the end of the start tag
863  // (sort of) in case of a non-empty parent it finds the char before the start
864  // of the end tag.
865 
866  // Find out the indent we need:
867  int nTopParent = iPosParent;
868  int nLevel = 0;
869  while (nTopParent) {
870  nTopParent = m_aPos[nTopParent].iElemParent;
871  nLevel++;
872  }
873  int nIndentChars = nLevel * mnIndent;
874  mtIndent[nIndentChars] = 0;
875 
876  bool bEmptyParent = m_aPos[iPosParent].IsEmptyElement();
877  if (bEmptyParent ||
878  m_aPos[iPosParent].nStartR + 1 == m_aPos[iPosParent].nEndL) {
879  nOffset += 2;
880  } else {
881  if ((nOffset < (int)(m_csDoc.length())) && (0 < nOffset) &&
882  (' ' == m_csDoc[nOffset - 1])) {
883  while ((0 < nOffset) && (' ' == m_csDoc[nOffset - 1])) --nOffset;
884  }
885  }
886 
887  // Create element and modify positions of affected elements
888  // If no szValue is specified, an empty element is created
889  // i.e. either <NAME>value</NAME> or <NAME/>
890  int iPos = x_GetFreePos();
891  m_aPos[iPos].nStartL = nOffset + nIndentChars;
892 
893  // Set links
894  m_aPos[iPos].iElemParent = iPosParent;
895  m_aPos[iPos].iElemChild = 0;
896  m_aPos[iPos].iElemNext = 0;
897  if (iPosBefore) {
898  // Link in after iPosBefore
899  m_aPos[iPos].iElemNext = m_aPos[iPosBefore].iElemNext;
900  m_aPos[iPosBefore].iElemNext = iPos;
901  } else {
902  // First child
903  m_aPos[iPos].iElemNext = m_aPos[iPosParent].iElemChild;
904  m_aPos[iPosParent].iElemChild = iPos;
905  }
906 
907  // Create string for insert
908  std::string csInsert;
909  int nLenName = (int)strlen(szName);
910  int nLenValue = szValue ? (int)strlen(szValue) : 0;
911  if (!nLenValue) {
912  // <NAME/> empty element
913  csInsert = mtIndent;
914  csInsert += "<";
915  csInsert += szName;
916  csInsert += "/>\r\n";
917  m_aPos[iPos].nStartR = m_aPos[iPos].nStartL + nLenName + 2;
918  m_aPos[iPos].nEndL = m_aPos[iPos].nStartR - 1;
919  m_aPos[iPos].nEndR = m_aPos[iPos].nEndL + 1;
920  } else {
921  // <NAME>value</NAME>
922  std::string csValue = x_TextToDoc(szValue);
923  nLenValue = (int)csValue.length();
924  csInsert = mtIndent;
925  csInsert += "<";
926  csInsert += szName;
927  csInsert += ">";
928  csInsert += csValue;
929  csInsert += "</";
930  csInsert += szName;
931  csInsert += ">\r\n";
932  m_aPos[iPos].nStartR = m_aPos[iPos].nStartL + nLenName + 1;
933  m_aPos[iPos].nEndL = m_aPos[iPos].nStartR + nLenValue + 1;
934  m_aPos[iPos].nEndR = m_aPos[iPos].nEndL + nLenName + 2;
935  }
936  mtIndent[nIndentChars] = ' ';
937 
938  // Insert
939  int nReplace = 0, nLeft = m_aPos[iPos].nStartL;
940  if (bEmptyParent) {
941  std::string csParentTagName = x_GetTagName(iPosParent);
942  std::string csFormat;
943  csFormat = ">\r\n";
944  csFormat += csInsert;
945  mtIndent[nIndentChars - mnIndent] = 0;
946  csFormat += mtIndent;
947  mtIndent[nIndentChars - mnIndent] = ' ';
948  csFormat += "</";
949  csFormat += csParentTagName;
950  csInsert = csFormat;
951  nLeft = m_aPos[iPosParent].nStartR - 1;
952  nReplace = 1;
953  // x_Adjust is going to update all affected indexes by one amount
954  // This will satisfy all except the empty parent
955  // Here we pre-adjust for the empty parent
956  // The empty tag slash is removed
957  m_aPos[iPosParent].nStartR -= 1;
958  // For the newly created end tag, see the following example:
959  // <A/> (len 4) becomes <A><B/></A> (len 11)
960  // In x_Adjust everything will be adjusted 11 - 4 = 7
961  // But the nEndL of element A should only be adjusted 5
962  m_aPos[iPosParent].nEndL -= (int)(csParentTagName.length() + 1);
963  } else if (m_aPos[iPosParent].nStartR + 1 == m_aPos[iPosParent].nEndL) {
964  // Empty parent, but with an end tag following right after.
965  csInsert = "\r\n" + csInsert;
966  mtIndent[nIndentChars - mnIndent] = 0;
967  csInsert += mtIndent;
968  mtIndent[nIndentChars - mnIndent] = ' ';
969  nLeft = m_aPos[iPosParent].nStartR + 1;
970  } else {
971  nLeft -= nIndentChars;
972  }
973 
974  x_DocChange(nLeft, nReplace, csInsert);
975  x_Adjust(iPos, (int)csInsert.length() - nReplace);
976 
977  if (bAddChild)
978  x_SetPos(m_iPosParent, iPosParent, iPos);
979  else
980  x_SetPos(iPosParent, iPos, 0);
981  return true;
982 }
983 
984 std::string CMarkup::Format(const char* fmt, ...) {
985  using std::string;
986  using std::vector;
987 
988  string retStr("");
989 
990  if (NULL != fmt) {
991  va_list marker;
992 
993  // initialize variable arguments
994  va_start(marker, fmt);
995 
996  // Get formatted string length adding one for NULL
997 #ifdef _WIN32
998  size_t len = _vscprintf(fmt, marker) + 1;
999 #else
1000  va_list argcopy;
1001  va_copy(argcopy, marker);
1002  auto len = vsnprintf(NULL, 0, fmt, marker) + 1;
1003  va_end(argcopy);
1004 #endif
1005 
1006  // Create a char vector to hold the formatted string.
1007  vector<char> buffer(len, '\0');
1008 #ifdef _WIN32
1009  int nWritten = _vsnprintf_s(&buffer[0], buffer.size(), len, fmt, marker);
1010 #else
1011  int nWritten = vsnprintf(&buffer[0], len, fmt, marker);
1012 #endif
1013  if (nWritten > 0) {
1014  retStr = &buffer[0];
1015  }
1016 
1017  // Reset variable arguments
1018  va_end(marker);
1019  }
1020 
1021  return retStr;
1022 }
1023 
1024 std::string CMarkup::Mid(const std::string& tStr, int nFirst) const {
1025  return Mid(tStr, nFirst, (int)tStr.length() - nFirst);
1026 }
1027 
1028 std::string CMarkup::Mid(const std::string& tStr, int nFirst,
1029  int nCount) const {
1030  if (nFirst < 0) {
1031  nFirst = 0;
1032  }
1033  if (nCount < 0) {
1034  nCount = 0;
1035  }
1036 
1037  int nSize = static_cast<int>(tStr.size());
1038 
1039  if (nFirst + nCount > nSize) {
1040  nCount = nSize - nFirst;
1041  }
1042 
1043  if (nFirst > nSize) {
1044  std::string tStrEmpty;
1045  return tStrEmpty;
1046  }
1047 
1048  assert(nFirst >= 0);
1049  assert(nFirst + nCount <= nSize);
1050 
1051  return tStr.substr(nFirst, nCount);
1052 }
1053 
1054 char* CMarkup::GetBuffer(std::string& tStr, int nMinLen) const {
1055  if (static_cast<int>(tStr.size()) < nMinLen) {
1056  tStr.resize(nMinLen);
1057  }
1058 
1059  return const_cast<char*>(
1060  tStr.c_str()); // tStr.empty() ? const_cast<char*>(tStr.c_str()) :
1061  // &(tStr.at(0));
1062 }
1063 
1064 void CMarkup::ReleaseBuffer(std::string& tStr, int nNewLen) const {
1065  tStr.resize(nNewLen > -1 ? nNewLen : strlen(tStr.c_str()));
1066 }
1067 
1068 bool CMarkup::TokenPos::Match(const char* szName) const {
1069  int nLen = nR - nL + 1;
1070  return ((strncmp(&szDoc[nL], szName, nLen) == 0) &&
1071  (szName[nLen] == '\0' || strchr(" =/[", szName[nLen])));
1072 }
CMarkup
Definition: Markup.h:25
CMarkup::x_GetData
std::string x_GetData(int iPos) const
Definition: Markup.cpp:572
CMarkup::x_FindElem
int x_FindElem(int iPosParent, int iPos, const char *szPath)
Definition: Markup.cpp:377
CMarkup::m_aPos
std::vector< ElemPos > m_aPos
Definition: Markup.h:136
CMarkup::x_ParseError
int x_ParseError(const char *szError, const char *szName=NULL)
Definition: Markup.cpp:175
CMarkup::x_ParseElem
int x_ParseElem(int iPos)
Definition: Markup.cpp:184
CMarkup::mnIndent
int mnIndent
Definition: Markup.h:197
CMarkup::FindChildElem
bool FindChildElem(const char *szName=NULL)
Definition: Markup.cpp:101
CMarkup::MNT_COMMENT
@ MNT_COMMENT
Definition: Markup.h:90
CMarkup::MNT_TEXT
@ MNT_TEXT
Definition: Markup.h:86
CMarkup::x_FindChar
static bool x_FindChar(const char *szDoc, int &nChar, char c)
Definition: Markup.cpp:295
CMarkup::m_nNodeType
int m_nNodeType
Definition: Markup.h:141
CMarkup::TokenPos::Match
bool Match(const char *szName) const
Definition: Markup.cpp:1068
CMarkup::x_ReleasePos
int x_ReleasePos()
Definition: Markup.cpp:166
CMarkup::x_FindAny
static bool x_FindAny(const char *szDoc, int &nChar)
Definition: Markup.cpp:310
CMarkup::x_GetTagName
std::string x_GetTagName(int iPos) const
Definition: Markup.cpp:476
CMarkup::OutOfElem
bool OutOfElem()
Definition: Markup.cpp:143
CMarkup::MNT_PROCESSING_INSTRUCTION
@ MNT_PROCESSING_INSTRUCTION
Definition: Markup.h:89
CMarkup::x_FindAttrib
bool x_FindAttrib(TokenPos &token, const char *szAttrib=NULL) const
Definition: Markup.cpp:486
CMarkup::x_DocChange
void x_DocChange(int nLeft, int nReplace, const std::string &csInsert)
Definition: Markup.cpp:692
CMarkup::x_SetPos
void x_SetPos(int iPosParent, int iPos, int iPosChild)
Definition: Markup.h:163
CMarkup::x_ParseNode
int x_ParseNode(TokenPos &token)
Definition: Markup.cpp:401
CMarkup::m_csDoc
std::string m_csDoc
Definition: Markup.h:101
CMarkup::m_iPos
int m_iPos
Definition: Markup.h:138
CMarkup::GetTagName
std::string GetTagName() const
Definition: Markup.cpp:119
CMarkup::x_GetToken
std::string x_GetToken(const TokenPos &token) const
Definition: Markup.cpp:368
CMarkup::MNT_ELEMENT
@ MNT_ELEMENT
Definition: Markup.h:85
CMarkup::m_iPosChild
int m_iPosChild
Definition: Markup.h:139
CMarkup::x_SetAttrib
bool x_SetAttrib(int iPos, const char *szAttrib, const char *szValue)
Definition: Markup.cpp:533
CMarkup::operator=
void operator=(const CMarkup &markup)
Definition: Markup.cpp:29
CMarkup::m_csError
std::string m_csError
Definition: Markup.h:102
Markup.h
CMarkup::x_GetAttrib
std::string x_GetAttrib(int iPos, const char *szAttrib) const
Definition: Markup.cpp:519
MARKUP_SETDEBUGSTATE
#define MARKUP_SETDEBUGSTATE
Definition: Markup.h:22
CMarkup::x_GetFreePos
int x_GetFreePos()
Definition: Markup.cpp:156
CMarkup::TokenPos
Definition: Markup.h:143
CMarkup::x_AddElem
bool x_AddElem(const char *szName, const char *szValue, bool bInsert, bool bAddChild)
Definition: Markup.cpp:838
CMarkup::TokenPos::nR
int nR
Definition: Markup.h:157
CMarkup::TokenPos::nL
int nL
Definition: Markup.h:156
CMarkup::MNT_CDATA_SECTION
@ MNT_CDATA_SECTION
Definition: Markup.h:88
CMarkup::x_Adjust
void x_Adjust(int iPos, int nShift, bool bAfterPos=false)
Definition: Markup.cpp:721
CMarkup::MNT_WHITESPACE
@ MNT_WHITESPACE
Definition: Markup.h:87
CMarkup::IsWellFormed
bool IsWellFormed()
Definition: Markup.cpp:82
CMarkup::MNT_DOCUMENT_TYPE
@ MNT_DOCUMENT_TYPE
Definition: Markup.h:91
CMarkup::TokenPos::bIsString
bool bIsString
Definition: Markup.h:160
CMarkup::x_LocateNew
void x_LocateNew(int iPosParent, int &iPosRel, int &nOffset, int nLength, int nFlags)
Definition: Markup.cpp:763
CMarkup::m_iPosFree
int m_iPosFree
Definition: Markup.h:140
CMarkup::x_FindToken
static bool x_FindToken(TokenPos &token)
Definition: Markup.cpp:318
CMarkup::mtIndent
char mtIndent[1000]
Definition: Markup.h:196
CMarkup::TokenPos::szDoc
const char * szDoc
Definition: Markup.h:159
sot_talos_balance.test.test_base_estimator.c
c
Definition: test_base_estimator.py:23
CMarkup::m_iPosParent
int m_iPosParent
Definition: Markup.h:137
CMarkup::ResetPos
void ResetPos()
Definition: Markup.h:69
CMarkup::SetDoc
bool SetDoc(const char *szDoc)
Definition: Markup.cpp:41
CMarkup::IntoElem
bool IntoElem()
Definition: Markup.cpp:127
CMarkup::x_TextToDoc
std::string x_TextToDoc(const char *szText, bool bAttrib=false) const
Definition: Markup.cpp:593
CMarkup::FindElem
bool FindElem(const char *szName=NULL)
Definition: Markup.cpp:87
CMarkup::Format
static std::string Format(const char *fmt,...)
Definition: Markup.cpp:984
CMarkup::TokenPos::nNext
int nNext
Definition: Markup.h:158
CMarkup::x_TextFromDoc
std::string x_TextFromDoc(int nLeft, int nRight) const
Definition: Markup.cpp:643
CMarkup::SetIndent
void SetIndent(int nIndent=4)
Definition: Markup.cpp:27