OpenCPN Partial API docs
Loading...
Searching...
No Matches
pugixml.cpp
1
14#ifndef SOURCE_PUGIXML_CPP
15#define SOURCE_PUGIXML_CPP
16
17#include "pugixml.hpp"
18
19#include <stdlib.h>
20#include <stdio.h>
21#include <string.h>
22#include <assert.h>
23#include <limits.h>
24
25#ifdef PUGIXML_WCHAR_MODE
26#include <wchar.h>
27#endif
28
29#ifndef PUGIXML_NO_XPATH
30#include <math.h>
31#include <float.h>
32#ifdef PUGIXML_NO_EXCEPTIONS
33#include <setjmp.h>
34#endif
35#endif
36
37#ifndef PUGIXML_NO_STL
38#include <istream>
39#include <ostream>
40#include <string>
41#endif
42
43// For placement new
44#include <new>
45
46#ifdef _MSC_VER
47#pragma warning(push)
48#pragma warning(disable : 4127) // conditional expression is constant
49#pragma warning( \
50 disable : 4324) // structure was padded due to __declspec(align())
51#pragma warning(disable : 4611) // interaction between '_setjmp' and C++ object
52 // destruction is non-portable
53#pragma warning(disable : 4702) // unreachable code
54#pragma warning(disable : 4996) // this function or variable may be unsafe
55#pragma warning(disable : 4793) // function compiled as native: presence of
56 // '_setjmp' makes a function unmanaged
57#endif
58
59#ifdef __INTEL_COMPILER
60#pragma warning(disable : 177) // function was declared but never referenced
61#pragma warning(disable : 279) // controlling expression is constant
62#pragma warning(disable : 1478 1786) // function was declared "deprecated"
63#pragma warning( \
64 disable : 1684) // conversion from pointer to same-sized integral type
65#endif
66
67#if defined(__BORLANDC__) && defined(PUGIXML_HEADER_ONLY)
68#pragma warn - 8080 // symbol is declared but never used; disabling this inside
69 // push/pop bracket does not make the warning go away
70#endif
71
72#ifdef __BORLANDC__
73#pragma option push
74#pragma warn - 8008 // condition is always false
75#pragma warn - 8066 // unreachable code
76#endif
77
78#ifdef __SNC__
79// Using diag_push/diag_pop does not disable the warnings inside templates due
80// to a compiler bug
81#pragma diag_suppress = 178 // function was declared but never referenced
82#pragma diag_suppress = 237 // controlling expression is constant
83#endif
84
85// Inlining controls
86#if defined(_MSC_VER) && _MSC_VER >= 1300
87#define PUGI__NO_INLINE __declspec(noinline)
88#elif defined(__GNUC__)
89#define PUGI__NO_INLINE __attribute__((noinline))
90#else
91#define PUGI__NO_INLINE
92#endif
93
94// Branch weight controls
95#if defined(__GNUC__)
96#define PUGI__UNLIKELY(cond) __builtin_expect(cond, 0)
97#else
98#define PUGI__UNLIKELY(cond) (cond)
99#endif
100
101// Simple static assertion
102#define PUGI__STATIC_ASSERT(cond) \
103 { \
104 static const char condition_failed[(cond) ? 1 : -1] = {0}; \
105 (void)condition_failed[0]; \
106 }
107
108// Digital Mars C++ bug workaround for passing char loaded from memory via stack
109#ifdef __DMC__
110#define PUGI__DMC_VOLATILE volatile
111#else
112#define PUGI__DMC_VOLATILE
113#endif
114
115// Borland C++ bug workaround for not defining ::memcpy depending on header
116// include order (can't always use std::memcpy because some compilers don't have
117// it at all)
118#if defined(__BORLANDC__) && !defined(__MEM_H_USING_LIST)
119using std::memcpy;
120using std::memmove;
121using std::memset;
122#endif
123
124// Some MinGW versions have headers that erroneously omit
125// LLONG_MIN/LLONG_MAX/ULLONG_MAX definitions in strict ANSI mode
126#if defined(PUGIXML_HAS_LONG_LONG) && defined(__MINGW32__) && \
127 defined(__STRICT_ANSI__) && !defined(LLONG_MAX) && !defined(LLONG_MIN) && \
128 !defined(ULLONG_MAX)
129#define LLONG_MAX 9223372036854775807LL
130#define LLONG_MIN (-LLONG_MAX - 1)
131#define ULLONG_MAX (2ULL * LLONG_MAX + 1)
132#endif
133
134// In some environments MSVC is a compiler but the CRT lacks certain
135// MSVC-specific features
136#if defined(_MSC_VER) && !defined(__S3E__)
137#define PUGI__MSVC_CRT_VERSION _MSC_VER
138#endif
139
140#ifdef PUGIXML_HEADER_ONLY
141#define PUGI__NS_BEGIN \
142 namespace pugi { \
143 namespace impl {
144#define PUGI__NS_END \
145 } \
146 }
147#define PUGI__FN inline
148#define PUGI__FN_NO_INLINE inline
149#else
150#if defined(_MSC_VER) && \
151 _MSC_VER < 1300 // MSVC6 seems to have an amusing bug with anonymous
152 // namespaces inside namespaces
153#define PUGI__NS_BEGIN \
154 namespace pugi { \
155 namespace impl {
156#define PUGI__NS_END \
157 } \
158 }
159#else
160#define PUGI__NS_BEGIN \
161 namespace pugi { \
162 namespace impl { \
163 namespace {
164#define PUGI__NS_END \
165 } \
166 } \
167 }
168#endif
169#define PUGI__FN
170#define PUGI__FN_NO_INLINE PUGI__NO_INLINE
171#endif
172
173// uintptr_t
174#if (defined(_MSC_VER) && _MSC_VER < 1600) || \
175 (defined(__BORLANDC__) && __BORLANDC__ < 0x561)
176namespace pugi {
177#ifndef _UINTPTR_T_DEFINED
178typedef size_t uintptr_t;
179#endif
180
181typedef unsigned __int8 uint8_t;
182typedef unsigned __int16 uint16_t;
183typedef unsigned __int32 uint32_t;
184} // namespace pugi
185#else
186#include <stdint.h>
187#endif
188
189// Memory allocation
190PUGI__NS_BEGIN
191PUGI__FN void* default_allocate(size_t size) { return malloc(size); }
192
193PUGI__FN void default_deallocate(void* ptr) { free(ptr); }
194
195template <typename T>
197 static allocation_function allocate;
198 static deallocation_function deallocate;
199};
200
201// Global allocation functions are stored in class statics so that in header
202// mode linker deduplicates them Without a template<> we'll get multiple
203// definitions of the same static
204template <typename T>
206 default_allocate;
207template <typename T>
209 default_deallocate;
210
212PUGI__NS_END
213
214// String utilities
215PUGI__NS_BEGIN
216// Get string length
217PUGI__FN size_t strlength(const char_t* s) {
218 assert(s);
219
220#ifdef PUGIXML_WCHAR_MODE
221 return wcslen(s);
222#else
223 return strlen(s);
224#endif
225}
226
227// Compare two strings
228PUGI__FN bool strequal(const char_t* src, const char_t* dst) {
229 assert(src && dst);
230
231#ifdef PUGIXML_WCHAR_MODE
232 return wcscmp(src, dst) == 0;
233#else
234 return strcmp(src, dst) == 0;
235#endif
236}
237
238// Compare lhs with [rhs_begin, rhs_end)
239PUGI__FN bool strequalrange(const char_t* lhs, const char_t* rhs,
240 size_t count) {
241 for (size_t i = 0; i < count; ++i)
242 if (lhs[i] != rhs[i]) return false;
243
244 return lhs[count] == 0;
245}
246
247// Get length of wide string, even if CRT lacks wide character support
248PUGI__FN size_t strlength_wide(const wchar_t* s) {
249 assert(s);
250
251#ifdef PUGIXML_WCHAR_MODE
252 return wcslen(s);
253#else
254 const wchar_t* end = s;
255 while (*end) end++;
256 return static_cast<size_t>(end - s);
257#endif
258}
259PUGI__NS_END
260
261// auto_ptr-like object for exception recovery
262PUGI__NS_BEGIN
263template <typename T>
265 typedef void (*D)(T*);
266
267 T* data;
268 D deleter;
269
270 auto_deleter(T* data_, D deleter_) : data(data_), deleter(deleter_) {}
271
272 ~auto_deleter() {
273 if (data) deleter(data);
274 }
275
276 T* release() {
277 T* result = data;
278 data = 0;
279 return result;
280 }
281};
282PUGI__NS_END
283
284#ifdef PUGIXML_COMPACT
285PUGI__NS_BEGIN
286class compact_hash_table {
287public:
288 compact_hash_table() : _items(0), _capacity(0), _count(0) {}
289
290 void clear() {
291 if (_items) {
292 xml_memory::deallocate(_items);
293 _items = 0;
294 _capacity = 0;
295 _count = 0;
296 }
297 }
298
299 void** find(const void* key) {
300 assert(key);
301
302 if (_capacity == 0) return 0;
303
304 size_t hashmod = _capacity - 1;
305 size_t bucket = hash(key) & hashmod;
306
307 for (size_t probe = 0; probe <= hashmod; ++probe) {
308 item_t& probe_item = _items[bucket];
309
310 if (probe_item.key == key) return &probe_item.value;
311
312 if (probe_item.key == 0) return 0;
313
314 // hash collision, quadratic probing
315 bucket = (bucket + probe + 1) & hashmod;
316 }
317
318 assert(false && "Hash table is full");
319 return 0;
320 }
321
322 void** insert(const void* key) {
323 assert(key);
324 assert(_capacity != 0 && _count < _capacity - _capacity / 4);
325
326 size_t hashmod = _capacity - 1;
327 size_t bucket = hash(key) & hashmod;
328
329 for (size_t probe = 0; probe <= hashmod; ++probe) {
330 item_t& probe_item = _items[bucket];
331
332 if (probe_item.key == 0) {
333 probe_item.key = key;
334 _count++;
335 return &probe_item.value;
336 }
337
338 if (probe_item.key == key) return &probe_item.value;
339
340 // hash collision, quadratic probing
341 bucket = (bucket + probe + 1) & hashmod;
342 }
343
344 assert(false && "Hash table is full");
345 return 0;
346 }
347
348 bool reserve() {
349 if (_count + 16 >= _capacity - _capacity / 4) return rehash();
350
351 return true;
352 }
353
354private:
355 struct item_t {
356 const void* key;
357 void* value;
358 };
359
360 item_t* _items;
361 size_t _capacity;
362
363 size_t _count;
364
365 bool rehash();
366
367 static unsigned int hash(const void* key) {
368 unsigned int h =
369 static_cast<unsigned int>(reinterpret_cast<uintptr_t>(key));
370
371 // MurmurHash3 32-bit finalizer
372 h ^= h >> 16;
373 h *= 0x85ebca6bu;
374 h ^= h >> 13;
375 h *= 0xc2b2ae35u;
376 h ^= h >> 16;
377
378 return h;
379 }
380};
381
382PUGI__FN_NO_INLINE bool compact_hash_table::rehash() {
383 compact_hash_table rt;
384 rt._capacity = (_capacity == 0) ? 32 : _capacity * 2;
385 rt._items =
386 static_cast<item_t*>(xml_memory::allocate(sizeof(item_t) * rt._capacity));
387
388 if (!rt._items) return false;
389
390 memset(rt._items, 0, sizeof(item_t) * rt._capacity);
391
392 for (size_t i = 0; i < _capacity; ++i)
393 if (_items[i].key) *rt.insert(_items[i].key) = _items[i].value;
394
395 if (_items) xml_memory::deallocate(_items);
396
397 _capacity = rt._capacity;
398 _items = rt._items;
399
400 assert(_count == rt._count);
401
402 return true;
403}
404
405PUGI__NS_END
406#endif
407
408PUGI__NS_BEGIN
409#ifdef PUGIXML_COMPACT
410static const uintptr_t xml_memory_block_alignment = 4;
411#else
412static const uintptr_t xml_memory_block_alignment = sizeof(void*);
413#endif
414
415// extra metadata bits
416static const uintptr_t xml_memory_page_contents_shared_mask = 64;
417static const uintptr_t xml_memory_page_name_allocated_mask = 32;
418static const uintptr_t xml_memory_page_value_allocated_mask = 16;
419static const uintptr_t xml_memory_page_type_mask = 15;
420
421// combined masks for string uniqueness
422static const uintptr_t xml_memory_page_name_allocated_or_shared_mask =
423 xml_memory_page_name_allocated_mask | xml_memory_page_contents_shared_mask;
424static const uintptr_t xml_memory_page_value_allocated_or_shared_mask =
425 xml_memory_page_value_allocated_mask | xml_memory_page_contents_shared_mask;
426
427#ifdef PUGIXML_COMPACT
428#define PUGI__GETHEADER_IMPL(object, page, flags) // unused
429#define PUGI__GETPAGE_IMPL(header) (header).get_page()
430#else
431#define PUGI__GETHEADER_IMPL(object, page, flags) \
432 (((reinterpret_cast<char*>(object) - reinterpret_cast<char*>(page)) << 8) | \
433 (flags))
434// this macro casts pointers through void* to avoid 'cast increases required
435// alignment of target type' warnings
436#define PUGI__GETPAGE_IMPL(header) \
437 static_cast<impl::xml_memory_page*>( \
438 const_cast<void*>(static_cast<const void*>( \
439 reinterpret_cast<const char*>(&header) - (header >> 8))))
440#endif
441
442#define PUGI__GETPAGE(n) PUGI__GETPAGE_IMPL((n)->header)
443#define PUGI__NODETYPE(n) \
444 static_cast<xml_node_type>((n)->header & impl::xml_memory_page_type_mask)
445
446struct xml_allocator;
447
449 static xml_memory_page* construct(void* memory) {
450 xml_memory_page* result = static_cast<xml_memory_page*>(memory);
451
452 result->allocator = 0;
453 result->prev = 0;
454 result->next = 0;
455 result->busy_size = 0;
456 result->freed_size = 0;
457
458#ifdef PUGIXML_COMPACT
459 result->compact_string_base = 0;
460 result->compact_shared_parent = 0;
461 result->compact_page_marker = 0;
462#endif
463
464 return result;
465 }
466
467 xml_allocator* allocator;
468
469 xml_memory_page* prev;
470 xml_memory_page* next;
471
472 size_t busy_size;
473 size_t freed_size;
474
475#ifdef PUGIXML_COMPACT
476 char_t* compact_string_base;
477 void* compact_shared_parent;
478 uint32_t* compact_page_marker;
479#endif
480};
481
482static const size_t xml_memory_page_size =
483#ifdef PUGIXML_MEMORY_PAGE_SIZE
484 (PUGIXML_MEMORY_PAGE_SIZE)
485#else
486 32768
487#endif
488 - sizeof(xml_memory_page);
489
491 uint16_t page_offset; // offset from page->data
492 uint16_t full_size; // 0 if string occupies whole page
493};
494
497 : _root(root), _busy_size(root->busy_size) {
498#ifdef PUGIXML_COMPACT
499 _hash = 0;
500#endif
501 }
502
503 xml_memory_page* allocate_page(size_t data_size) {
504 size_t size = sizeof(xml_memory_page) + data_size;
505
506 // allocate block with some alignment, leaving memory for worst-case padding
507 void* memory = xml_memory::allocate(size);
508 if (!memory) return 0;
509
510 // prepare page structure
511 xml_memory_page* page = xml_memory_page::construct(memory);
512 assert(page);
513
514 page->allocator = _root->allocator;
515
516 return page;
517 }
518
519 static void deallocate_page(xml_memory_page* page) {
520 xml_memory::deallocate(page);
521 }
522
523 void* allocate_memory_oob(size_t size, xml_memory_page*& out_page);
524
525 void* allocate_memory(size_t size, xml_memory_page*& out_page) {
526 if (PUGI__UNLIKELY(_busy_size + size > xml_memory_page_size))
527 return allocate_memory_oob(size, out_page);
528
529 void* buf =
530 reinterpret_cast<char*>(_root) + sizeof(xml_memory_page) + _busy_size;
531
532 _busy_size += size;
533
534 out_page = _root;
535
536 return buf;
537 }
538
539#ifdef PUGIXML_COMPACT
540 void* allocate_object(size_t size, xml_memory_page*& out_page) {
541 void* result = allocate_memory(size + sizeof(uint32_t), out_page);
542 if (!result) return 0;
543
544 // adjust for marker
545 ptrdiff_t offset = static_cast<char*>(result) -
546 reinterpret_cast<char*>(out_page->compact_page_marker);
547
548 if (PUGI__UNLIKELY(static_cast<uintptr_t>(offset) >=
549 256 * xml_memory_block_alignment)) {
550 // insert new marker
551 uint32_t* marker = static_cast<uint32_t*>(result);
552
553 *marker = static_cast<uint32_t>(reinterpret_cast<char*>(marker) -
554 reinterpret_cast<char*>(out_page));
555 out_page->compact_page_marker = marker;
556
557 // since we don't reuse the page space until we reallocate it, we can just
558 // pretend that we freed the marker block this will make sure
559 // deallocate_memory correctly tracks the size
560 out_page->freed_size += sizeof(uint32_t);
561
562 return marker + 1;
563 } else {
564 // roll back uint32_t part
565 _busy_size -= sizeof(uint32_t);
566
567 return result;
568 }
569 }
570#else
571 void* allocate_object(size_t size, xml_memory_page*& out_page) {
572 return allocate_memory(size, out_page);
573 }
574#endif
575
576 void deallocate_memory(void* ptr, size_t size, xml_memory_page* page) {
577 if (page == _root) page->busy_size = _busy_size;
578
579 assert(ptr >= reinterpret_cast<char*>(page) + sizeof(xml_memory_page) &&
580 ptr < reinterpret_cast<char*>(page) + sizeof(xml_memory_page) +
581 page->busy_size);
582 (void)!ptr;
583
584 page->freed_size += size;
585 assert(page->freed_size <= page->busy_size);
586
587 if (page->freed_size == page->busy_size) {
588 if (page->next == 0) {
589 assert(_root == page);
590
591 // top page freed, just reset sizes
592 page->busy_size = 0;
593 page->freed_size = 0;
594
595#ifdef PUGIXML_COMPACT
596 // reset compact state to maximize efficiency
597 page->compact_string_base = 0;
598 page->compact_shared_parent = 0;
599 page->compact_page_marker = 0;
600#endif
601
602 _busy_size = 0;
603 } else {
604 assert(_root != page);
605 assert(page->prev);
606
607 // remove from the list
608 page->prev->next = page->next;
609 page->next->prev = page->prev;
610
611 // deallocate
612 deallocate_page(page);
613 }
614 }
615 }
616
617 char_t* allocate_string(size_t length) {
618 static const size_t max_encoded_offset =
619 (1 << 16) * xml_memory_block_alignment;
620
621 PUGI__STATIC_ASSERT(xml_memory_page_size <= max_encoded_offset);
622
623 // allocate memory for string and header block
624 size_t size = sizeof(xml_memory_string_header) + length * sizeof(char_t);
625
626 // round size up to block alignment boundary
627 size_t full_size = (size + (xml_memory_block_alignment - 1)) &
628 ~(xml_memory_block_alignment - 1);
629
630 xml_memory_page* page;
632 allocate_memory(full_size, page));
633
634 if (!header) return 0;
635
636 // setup header
637 ptrdiff_t page_offset = reinterpret_cast<char*>(header) -
638 reinterpret_cast<char*>(page) -
639 sizeof(xml_memory_page);
640
641 assert(page_offset % xml_memory_block_alignment == 0);
642 assert(page_offset >= 0 &&
643 static_cast<size_t>(page_offset) < max_encoded_offset);
644 header->page_offset = static_cast<uint16_t>(
645 static_cast<size_t>(page_offset) / xml_memory_block_alignment);
646
647 // full_size == 0 for large strings that occupy the whole page
648 assert(full_size % xml_memory_block_alignment == 0);
649 assert(full_size < max_encoded_offset ||
650 (page->busy_size == full_size && page_offset == 0));
651 header->full_size = static_cast<uint16_t>(
652 full_size < max_encoded_offset ? full_size / xml_memory_block_alignment
653 : 0);
654
655 // round-trip through void* to avoid 'cast increases required alignment of
656 // target type' warning header is guaranteed a pointer-sized alignment,
657 // which should be enough for char_t
658 return static_cast<char_t*>(static_cast<void*>(header + 1));
659 }
660
661 void deallocate_string(char_t* string) {
662 // this function casts pointers through void* to avoid 'cast increases
663 // required alignment of target type' warnings we're guaranteed the proper
664 // (pointer-sized) alignment on the input string if it was allocated via
665 // allocate_string
666
667 // get header
669 static_cast<xml_memory_string_header*>(static_cast<void*>(string)) - 1;
670 assert(header);
671
672 // deallocate
673 size_t page_offset = sizeof(xml_memory_page) +
674 header->page_offset * xml_memory_block_alignment;
675 xml_memory_page* page = reinterpret_cast<xml_memory_page*>(
676 static_cast<void*>(reinterpret_cast<char*>(header) - page_offset));
677
678 // if full_size == 0 then this string occupies the whole page
679 size_t full_size = header->full_size == 0
680 ? page->busy_size
681 : header->full_size * xml_memory_block_alignment;
682
683 deallocate_memory(header, full_size, page);
684 }
685
686 bool reserve() {
687#ifdef PUGIXML_COMPACT
688 return _hash->reserve();
689#else
690 return true;
691#endif
692 }
693
694 xml_memory_page* _root;
695 size_t _busy_size;
696
697#ifdef PUGIXML_COMPACT
698 compact_hash_table* _hash;
699#endif
700};
701
702PUGI__FN_NO_INLINE void* xml_allocator::allocate_memory_oob(
703 size_t size, xml_memory_page*& out_page) {
704 const size_t large_allocation_threshold = xml_memory_page_size / 4;
705
706 xml_memory_page* page = allocate_page(
707 size <= large_allocation_threshold ? xml_memory_page_size : size);
708 out_page = page;
709
710 if (!page) return 0;
711
712 if (size <= large_allocation_threshold) {
713 _root->busy_size = _busy_size;
714
715 // insert page at the end of linked list
716 page->prev = _root;
717 _root->next = page;
718 _root = page;
719
720 _busy_size = size;
721 } else {
722 // insert page before the end of linked list, so that it is deleted as soon
723 // as possible the last page is not deleted even if it's empty (see
724 // deallocate_memory)
725 assert(_root->prev);
726
727 page->prev = _root->prev;
728 page->next = _root;
729
730 _root->prev->next = page;
731 _root->prev = page;
732
733 page->busy_size = size;
734 }
735
736 return reinterpret_cast<char*>(page) + sizeof(xml_memory_page);
737}
738PUGI__NS_END
739
740#ifdef PUGIXML_COMPACT
741PUGI__NS_BEGIN
742static const uintptr_t compact_alignment_log2 = 2;
743static const uintptr_t compact_alignment = 1 << compact_alignment_log2;
744
745class compact_header {
746public:
747 compact_header(xml_memory_page* page, unsigned int flags) {
748 PUGI__STATIC_ASSERT(xml_memory_block_alignment == compact_alignment);
749
750 ptrdiff_t offset = (reinterpret_cast<char*>(this) -
751 reinterpret_cast<char*>(page->compact_page_marker));
752 assert(offset % compact_alignment == 0 &&
753 static_cast<uintptr_t>(offset) < 256 * compact_alignment);
754
755 _page = static_cast<unsigned char>(offset >> compact_alignment_log2);
756 _flags = static_cast<unsigned char>(flags);
757 }
758
759 void operator&=(uintptr_t mod) { _flags &= static_cast<unsigned char>(mod); }
760
761 void operator|=(uintptr_t mod) { _flags |= static_cast<unsigned char>(mod); }
762
763 uintptr_t operator&(uintptr_t mod) const { return _flags & mod; }
764
765 xml_memory_page* get_page() const {
766 // round-trip through void* to silence 'cast increases required alignment of
767 // target type' warnings
768 const char* page_marker =
769 reinterpret_cast<const char*>(this) - (_page << compact_alignment_log2);
770 const char* page = page_marker - *reinterpret_cast<const uint32_t*>(
771 static_cast<const void*>(page_marker));
772
773 return const_cast<xml_memory_page*>(
774 reinterpret_cast<const xml_memory_page*>(
775 static_cast<const void*>(page)));
776 }
777
778private:
779 unsigned char _page;
780 unsigned char _flags;
781};
782
783PUGI__FN xml_memory_page* compact_get_page(const void* object,
784 int header_offset) {
785 const compact_header* header = reinterpret_cast<const compact_header*>(
786 static_cast<const char*>(object) - header_offset);
787
788 return header->get_page();
789}
790
791template <int header_offset, typename T>
792PUGI__FN_NO_INLINE T* compact_get_value(const void* object) {
793 return static_cast<T*>(
794 *compact_get_page(object, header_offset)->allocator->_hash->find(object));
795}
796
797template <int header_offset, typename T>
798PUGI__FN_NO_INLINE void compact_set_value(const void* object, T* value) {
799 *compact_get_page(object, header_offset)->allocator->_hash->insert(object) =
800 value;
801}
802
803template <typename T, int header_offset, int start = -126>
804class compact_pointer {
805public:
806 compact_pointer() : _data(0) {}
807
808 void operator=(const compact_pointer& rhs) { *this = rhs + 0; }
809
810 void operator=(T* value) {
811 if (value) {
812 // value is guaranteed to be compact-aligned; 'this' is not
813 // our decoding is based on 'this' aligned to compact alignment downwards
814 // (see operator T*) so for negative offsets (e.g. -3) we need to adjust
815 // the diff by compact_alignment - 1 to compensate for arithmetic shift
816 // rounding for negative values
817 ptrdiff_t diff =
818 reinterpret_cast<char*>(value) - reinterpret_cast<char*>(this);
819 ptrdiff_t offset =
820 ((diff + int(compact_alignment - 1)) >> compact_alignment_log2) -
821 start;
822
823 if (static_cast<uintptr_t>(offset) <= 253)
824 _data = static_cast<unsigned char>(offset + 1);
825 else {
826 compact_set_value<header_offset>(this, value);
827
828 _data = 255;
829 }
830 } else
831 _data = 0;
832 }
833
834 operator T*() const {
835 if (_data) {
836 if (_data < 255) {
837 uintptr_t base =
838 reinterpret_cast<uintptr_t>(this) & ~(compact_alignment - 1);
839
840 return reinterpret_cast<T*>(
841 base + ((_data - 1 + start) << compact_alignment_log2));
842 } else
843 return compact_get_value<header_offset, T>(this);
844 } else
845 return 0;
846 }
847
848 T* operator->() const { return *this; }
849
850private:
851 unsigned char _data;
852};
853
854template <typename T, int header_offset>
855class compact_pointer_parent {
856public:
857 compact_pointer_parent() : _data(0) {}
858
859 void operator=(const compact_pointer_parent& rhs) { *this = rhs + 0; }
860
861 void operator=(T* value) {
862 if (value) {
863 // value is guaranteed to be compact-aligned; 'this' is not
864 // our decoding is based on 'this' aligned to compact alignment downwards
865 // (see operator T*) so for negative offsets (e.g. -3) we need to adjust
866 // the diff by compact_alignment - 1 to compensate for arithmetic shift
867 // behavior for negative values
868 ptrdiff_t diff =
869 reinterpret_cast<char*>(value) - reinterpret_cast<char*>(this);
870 ptrdiff_t offset =
871 ((diff + int(compact_alignment - 1)) >> compact_alignment_log2) +
872 65533;
873
874 if (static_cast<uintptr_t>(offset) <= 65533) {
875 _data = static_cast<unsigned short>(offset + 1);
876 } else {
877 xml_memory_page* page = compact_get_page(this, header_offset);
878
879 if (PUGI__UNLIKELY(page->compact_shared_parent == 0))
880 page->compact_shared_parent = value;
881
882 if (page->compact_shared_parent == value) {
883 _data = 65534;
884 } else {
885 compact_set_value<header_offset>(this, value);
886
887 _data = 65535;
888 }
889 }
890 } else {
891 _data = 0;
892 }
893 }
894
895 operator T*() const {
896 if (_data) {
897 if (_data < 65534) {
898 uintptr_t base =
899 reinterpret_cast<uintptr_t>(this) & ~(compact_alignment - 1);
900
901 return reinterpret_cast<T*>(
902 base + ((_data - 1 - 65533) << compact_alignment_log2));
903 } else if (_data == 65534)
904 return static_cast<T*>(
905 compact_get_page(this, header_offset)->compact_shared_parent);
906 else
907 return compact_get_value<header_offset, T>(this);
908 } else
909 return 0;
910 }
911
912 T* operator->() const { return *this; }
913
914private:
915 uint16_t _data;
916};
917
918template <int header_offset, int base_offset>
919class compact_string {
920public:
921 compact_string() : _data(0) {}
922
923 void operator=(const compact_string& rhs) { *this = rhs + 0; }
924
925 void operator=(char_t* value) {
926 if (value) {
927 xml_memory_page* page = compact_get_page(this, header_offset);
928
929 if (PUGI__UNLIKELY(page->compact_string_base == 0))
930 page->compact_string_base = value;
931
932 ptrdiff_t offset = value - page->compact_string_base;
933
934 if (static_cast<uintptr_t>(offset) < (65535 << 7)) {
935 // round-trip through void* to silence 'cast increases required
936 // alignment of target type' warnings
937 uint16_t* base = reinterpret_cast<uint16_t*>(
938 static_cast<void*>(reinterpret_cast<char*>(this) - base_offset));
939
940 if (*base == 0) {
941 *base = static_cast<uint16_t>((offset >> 7) + 1);
942 _data = static_cast<unsigned char>((offset & 127) + 1);
943 } else {
944 ptrdiff_t remainder = offset - ((*base - 1) << 7);
945
946 if (static_cast<uintptr_t>(remainder) <= 253) {
947 _data = static_cast<unsigned char>(remainder + 1);
948 } else {
949 compact_set_value<header_offset>(this, value);
950
951 _data = 255;
952 }
953 }
954 } else {
955 compact_set_value<header_offset>(this, value);
956
957 _data = 255;
958 }
959 } else {
960 _data = 0;
961 }
962 }
963
964 operator char_t*() const {
965 if (_data) {
966 if (_data < 255) {
967 xml_memory_page* page = compact_get_page(this, header_offset);
968
969 // round-trip through void* to silence 'cast increases required
970 // alignment of target type' warnings
971 const uint16_t* base =
972 reinterpret_cast<const uint16_t*>(static_cast<const void*>(
973 reinterpret_cast<const char*>(this) - base_offset));
974 assert(*base);
975
976 ptrdiff_t offset = ((*base - 1) << 7) + (_data - 1);
977
978 return page->compact_string_base + offset;
979 } else {
980 return compact_get_value<header_offset, char_t>(this);
981 }
982 } else
983 return 0;
984 }
985
986private:
987 unsigned char _data;
988};
989PUGI__NS_END
990#endif
991
992#ifdef PUGIXML_COMPACT
993namespace pugi {
994struct xml_attribute_struct {
995 xml_attribute_struct(impl::xml_memory_page* page)
996 : header(page, 0), namevalue_base(0) {
997 PUGI__STATIC_ASSERT(sizeof(xml_attribute_struct) == 8);
998 }
999
1000 impl::compact_header header;
1001
1002 uint16_t namevalue_base;
1003
1004 impl::compact_string<4, 2> name;
1005 impl::compact_string<5, 3> value;
1006
1007 impl::compact_pointer<xml_attribute_struct, 6> prev_attribute_c;
1008 impl::compact_pointer<xml_attribute_struct, 7, 0> next_attribute;
1009};
1010
1011struct xml_node_struct {
1012 xml_node_struct(impl::xml_memory_page* page, xml_node_type type)
1013 : header(page, type), namevalue_base(0) {
1014 PUGI__STATIC_ASSERT(sizeof(xml_node_struct) == 12);
1015 }
1016
1017 impl::compact_header header;
1018
1019 uint16_t namevalue_base;
1020
1021 impl::compact_string<4, 2> name;
1022 impl::compact_string<5, 3> value;
1023
1024 impl::compact_pointer_parent<xml_node_struct, 6> parent;
1025
1026 impl::compact_pointer<xml_node_struct, 8, 0> first_child;
1027
1028 impl::compact_pointer<xml_node_struct, 9> prev_sibling_c;
1029 impl::compact_pointer<xml_node_struct, 10, 0> next_sibling;
1030
1031 impl::compact_pointer<xml_attribute_struct, 11, 0> first_attribute;
1032};
1033} // namespace pugi
1034#else
1035namespace pugi {
1037 xml_attribute_struct(impl::xml_memory_page* page)
1038 : name(0), value(0), prev_attribute_c(0), next_attribute(0) {
1039 header = PUGI__GETHEADER_IMPL(this, page, 0);
1040 }
1041
1042 uintptr_t header;
1043
1044 char_t* name;
1045 char_t* value;
1046
1047 xml_attribute_struct* prev_attribute_c;
1048 xml_attribute_struct* next_attribute;
1049};
1050
1052 xml_node_struct(impl::xml_memory_page* page, xml_node_type type)
1053 : name(0),
1054 value(0),
1055 parent(0),
1056 first_child(0),
1057 prev_sibling_c(0),
1058 next_sibling(0),
1059 first_attribute(0) {
1060 header = PUGI__GETHEADER_IMPL(this, page, type);
1061 }
1062
1063 uintptr_t header;
1064
1065 char_t* name;
1066 char_t* value;
1067
1068 xml_node_struct* parent;
1069
1070 xml_node_struct* first_child;
1071
1072 xml_node_struct* prev_sibling_c;
1073 xml_node_struct* next_sibling;
1074
1075 xml_attribute_struct* first_attribute;
1076};
1077} // namespace pugi
1078#endif
1079
1080PUGI__NS_BEGIN
1082 char_t* buffer;
1083 xml_extra_buffer* next;
1084};
1085
1086struct xml_document_struct : public xml_node_struct, public xml_allocator {
1088 : xml_node_struct(page, node_document),
1089 xml_allocator(page),
1090 buffer(0),
1091 extra_buffers(0) {}
1092
1093 const char_t* buffer;
1094
1095 xml_extra_buffer* extra_buffers;
1096
1097#ifdef PUGIXML_COMPACT
1098 compact_hash_table hash;
1099#endif
1100};
1101
1102template <typename Object>
1103inline xml_allocator& get_allocator(const Object* object) {
1104 assert(object);
1105
1106 return *PUGI__GETPAGE(object)->allocator;
1107}
1108
1109template <typename Object>
1110inline xml_document_struct& get_document(const Object* object) {
1111 assert(object);
1112
1113 return *static_cast<xml_document_struct*>(PUGI__GETPAGE(object)->allocator);
1114}
1115PUGI__NS_END
1116
1117// Low-level DOM operations
1118PUGI__NS_BEGIN
1119inline xml_attribute_struct* allocate_attribute(xml_allocator& alloc) {
1120 xml_memory_page* page;
1121 void* memory = alloc.allocate_object(sizeof(xml_attribute_struct), page);
1122 if (!memory) return 0;
1123
1124 return new (memory) xml_attribute_struct(page);
1125}
1126
1127inline xml_node_struct* allocate_node(xml_allocator& alloc,
1128 xml_node_type type) {
1129 xml_memory_page* page;
1130 void* memory = alloc.allocate_object(sizeof(xml_node_struct), page);
1131 if (!memory) return 0;
1132
1133 return new (memory) xml_node_struct(page, type);
1134}
1135
1136inline void destroy_attribute(xml_attribute_struct* a, xml_allocator& alloc) {
1137 if (a->header & impl::xml_memory_page_name_allocated_mask)
1138 alloc.deallocate_string(a->name);
1139
1140 if (a->header & impl::xml_memory_page_value_allocated_mask)
1141 alloc.deallocate_string(a->value);
1142
1143 alloc.deallocate_memory(a, sizeof(xml_attribute_struct), PUGI__GETPAGE(a));
1144}
1145
1146inline void destroy_node(xml_node_struct* n, xml_allocator& alloc) {
1147 if (n->header & impl::xml_memory_page_name_allocated_mask)
1148 alloc.deallocate_string(n->name);
1149
1150 if (n->header & impl::xml_memory_page_value_allocated_mask)
1151 alloc.deallocate_string(n->value);
1152
1153 for (xml_attribute_struct* attr = n->first_attribute; attr;) {
1154 xml_attribute_struct* next = attr->next_attribute;
1155
1156 destroy_attribute(attr, alloc);
1157
1158 attr = next;
1159 }
1160
1161 for (xml_node_struct* child = n->first_child; child;) {
1162 xml_node_struct* next = child->next_sibling;
1163
1164 destroy_node(child, alloc);
1165
1166 child = next;
1167 }
1168
1169 alloc.deallocate_memory(n, sizeof(xml_node_struct), PUGI__GETPAGE(n));
1170}
1171
1172inline void append_node(xml_node_struct* child, xml_node_struct* node) {
1173 child->parent = node;
1174
1175 xml_node_struct* head = node->first_child;
1176
1177 if (head) {
1178 xml_node_struct* tail = head->prev_sibling_c;
1179
1180 tail->next_sibling = child;
1181 child->prev_sibling_c = tail;
1182 head->prev_sibling_c = child;
1183 } else {
1184 node->first_child = child;
1185 child->prev_sibling_c = child;
1186 }
1187}
1188
1189inline void prepend_node(xml_node_struct* child, xml_node_struct* node) {
1190 child->parent = node;
1191
1192 xml_node_struct* head = node->first_child;
1193
1194 if (head) {
1195 child->prev_sibling_c = head->prev_sibling_c;
1196 head->prev_sibling_c = child;
1197 } else
1198 child->prev_sibling_c = child;
1199
1200 child->next_sibling = head;
1201 node->first_child = child;
1202}
1203
1204inline void insert_node_after(xml_node_struct* child, xml_node_struct* node) {
1205 xml_node_struct* parent = node->parent;
1206
1207 child->parent = parent;
1208
1209 if (node->next_sibling)
1210 node->next_sibling->prev_sibling_c = child;
1211 else
1212 parent->first_child->prev_sibling_c = child;
1213
1214 child->next_sibling = node->next_sibling;
1215 child->prev_sibling_c = node;
1216
1217 node->next_sibling = child;
1218}
1219
1220inline void insert_node_before(xml_node_struct* child, xml_node_struct* node) {
1221 xml_node_struct* parent = node->parent;
1222
1223 child->parent = parent;
1224
1225 if (node->prev_sibling_c->next_sibling)
1226 node->prev_sibling_c->next_sibling = child;
1227 else
1228 parent->first_child = child;
1229
1230 child->prev_sibling_c = node->prev_sibling_c;
1231 child->next_sibling = node;
1232
1233 node->prev_sibling_c = child;
1234}
1235
1236inline void remove_node(xml_node_struct* node) {
1237 xml_node_struct* parent = node->parent;
1238
1239 if (node->next_sibling)
1240 node->next_sibling->prev_sibling_c = node->prev_sibling_c;
1241 else
1242 parent->first_child->prev_sibling_c = node->prev_sibling_c;
1243
1244 if (node->prev_sibling_c->next_sibling)
1245 node->prev_sibling_c->next_sibling = node->next_sibling;
1246 else
1247 parent->first_child = node->next_sibling;
1248
1249 node->parent = 0;
1250 node->prev_sibling_c = 0;
1251 node->next_sibling = 0;
1252}
1253
1254inline void append_attribute(xml_attribute_struct* attr,
1255 xml_node_struct* node) {
1256 xml_attribute_struct* head = node->first_attribute;
1257
1258 if (head) {
1259 xml_attribute_struct* tail = head->prev_attribute_c;
1260
1261 tail->next_attribute = attr;
1262 attr->prev_attribute_c = tail;
1263 head->prev_attribute_c = attr;
1264 } else {
1265 node->first_attribute = attr;
1266 attr->prev_attribute_c = attr;
1267 }
1268}
1269
1270inline void prepend_attribute(xml_attribute_struct* attr,
1271 xml_node_struct* node) {
1272 xml_attribute_struct* head = node->first_attribute;
1273
1274 if (head) {
1275 attr->prev_attribute_c = head->prev_attribute_c;
1276 head->prev_attribute_c = attr;
1277 } else
1278 attr->prev_attribute_c = attr;
1279
1280 attr->next_attribute = head;
1281 node->first_attribute = attr;
1282}
1283
1284inline void insert_attribute_after(xml_attribute_struct* attr,
1285 xml_attribute_struct* place,
1286 xml_node_struct* node) {
1287 if (place->next_attribute)
1288 place->next_attribute->prev_attribute_c = attr;
1289 else
1290 node->first_attribute->prev_attribute_c = attr;
1291
1292 attr->next_attribute = place->next_attribute;
1293 attr->prev_attribute_c = place;
1294 place->next_attribute = attr;
1295}
1296
1297inline void insert_attribute_before(xml_attribute_struct* attr,
1298 xml_attribute_struct* place,
1299 xml_node_struct* node) {
1300 if (place->prev_attribute_c->next_attribute)
1301 place->prev_attribute_c->next_attribute = attr;
1302 else
1303 node->first_attribute = attr;
1304
1305 attr->prev_attribute_c = place->prev_attribute_c;
1306 attr->next_attribute = place;
1307 place->prev_attribute_c = attr;
1308}
1309
1310inline void remove_attribute(xml_attribute_struct* attr,
1311 xml_node_struct* node) {
1312 if (attr->next_attribute)
1313 attr->next_attribute->prev_attribute_c = attr->prev_attribute_c;
1314 else
1315 node->first_attribute->prev_attribute_c = attr->prev_attribute_c;
1316
1317 if (attr->prev_attribute_c->next_attribute)
1318 attr->prev_attribute_c->next_attribute = attr->next_attribute;
1319 else
1320 node->first_attribute = attr->next_attribute;
1321
1322 attr->prev_attribute_c = 0;
1323 attr->next_attribute = 0;
1324}
1325
1326PUGI__FN_NO_INLINE xml_node_struct* append_new_node(
1327 xml_node_struct* node, xml_allocator& alloc,
1328 xml_node_type type = node_element) {
1329 if (!alloc.reserve()) return 0;
1330
1331 xml_node_struct* child = allocate_node(alloc, type);
1332 if (!child) return 0;
1333
1334 append_node(child, node);
1335
1336 return child;
1337}
1338
1339PUGI__FN_NO_INLINE xml_attribute_struct* append_new_attribute(
1340 xml_node_struct* node, xml_allocator& alloc) {
1341 if (!alloc.reserve()) return 0;
1342
1343 xml_attribute_struct* attr = allocate_attribute(alloc);
1344 if (!attr) return 0;
1345
1346 append_attribute(attr, node);
1347
1348 return attr;
1349}
1350PUGI__NS_END
1351
1352// Helper classes for code generation
1353PUGI__NS_BEGIN
1355 enum { value = 0 };
1356};
1357
1358struct opt_true {
1359 enum { value = 1 };
1360};
1361PUGI__NS_END
1362
1363// Unicode utilities
1364PUGI__NS_BEGIN
1365inline uint16_t endian_swap(uint16_t value) {
1366 return static_cast<uint16_t>(((value & 0xff) << 8) | (value >> 8));
1367}
1368
1369inline uint32_t endian_swap(uint32_t value) {
1370 return ((value & 0xff) << 24) | ((value & 0xff00) << 8) |
1371 ((value & 0xff0000) >> 8) | (value >> 24);
1372}
1373
1375 typedef size_t value_type;
1376
1377 static value_type low(value_type result, uint32_t ch) {
1378 // U+0000..U+007F
1379 if (ch < 0x80) return result + 1;
1380 // U+0080..U+07FF
1381 else if (ch < 0x800)
1382 return result + 2;
1383 // U+0800..U+FFFF
1384 else
1385 return result + 3;
1386 }
1387
1388 static value_type high(value_type result, uint32_t) {
1389 // U+10000..U+10FFFF
1390 return result + 4;
1391 }
1392};
1393
1395 typedef uint8_t* value_type;
1396
1397 static value_type low(value_type result, uint32_t ch) {
1398 // U+0000..U+007F
1399 if (ch < 0x80) {
1400 *result = static_cast<uint8_t>(ch);
1401 return result + 1;
1402 }
1403 // U+0080..U+07FF
1404 else if (ch < 0x800) {
1405 result[0] = static_cast<uint8_t>(0xC0 | (ch >> 6));
1406 result[1] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
1407 return result + 2;
1408 }
1409 // U+0800..U+FFFF
1410 else {
1411 result[0] = static_cast<uint8_t>(0xE0 | (ch >> 12));
1412 result[1] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F));
1413 result[2] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
1414 return result + 3;
1415 }
1416 }
1417
1418 static value_type high(value_type result, uint32_t ch) {
1419 // U+10000..U+10FFFF
1420 result[0] = static_cast<uint8_t>(0xF0 | (ch >> 18));
1421 result[1] = static_cast<uint8_t>(0x80 | ((ch >> 12) & 0x3F));
1422 result[2] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F));
1423 result[3] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
1424 return result + 4;
1425 }
1426
1427 static value_type any(value_type result, uint32_t ch) {
1428 return (ch < 0x10000) ? low(result, ch) : high(result, ch);
1429 }
1430};
1431
1433 typedef size_t value_type;
1434
1435 static value_type low(value_type result, uint32_t) { return result + 1; }
1436
1437 static value_type high(value_type result, uint32_t) { return result + 2; }
1438};
1439
1441 typedef uint16_t* value_type;
1442
1443 static value_type low(value_type result, uint32_t ch) {
1444 *result = static_cast<uint16_t>(ch);
1445
1446 return result + 1;
1447 }
1448
1449 static value_type high(value_type result, uint32_t ch) {
1450 uint32_t msh = static_cast<uint32_t>(ch - 0x10000) >> 10;
1451 uint32_t lsh = static_cast<uint32_t>(ch - 0x10000) & 0x3ff;
1452
1453 result[0] = static_cast<uint16_t>(0xD800 + msh);
1454 result[1] = static_cast<uint16_t>(0xDC00 + lsh);
1455
1456 return result + 2;
1457 }
1458
1459 static value_type any(value_type result, uint32_t ch) {
1460 return (ch < 0x10000) ? low(result, ch) : high(result, ch);
1461 }
1462};
1463
1465 typedef size_t value_type;
1466
1467 static value_type low(value_type result, uint32_t) { return result + 1; }
1468
1469 static value_type high(value_type result, uint32_t) { return result + 1; }
1470};
1471
1473 typedef uint32_t* value_type;
1474
1475 static value_type low(value_type result, uint32_t ch) {
1476 *result = ch;
1477
1478 return result + 1;
1479 }
1480
1481 static value_type high(value_type result, uint32_t ch) {
1482 *result = ch;
1483
1484 return result + 1;
1485 }
1486
1487 static value_type any(value_type result, uint32_t ch) {
1488 *result = ch;
1489
1490 return result + 1;
1491 }
1492};
1493
1495 typedef uint8_t* value_type;
1496
1497 static value_type low(value_type result, uint32_t ch) {
1498 *result = static_cast<uint8_t>(ch > 255 ? '?' : ch);
1499
1500 return result + 1;
1501 }
1502
1503 static value_type high(value_type result, uint32_t ch) {
1504 (void)ch;
1505
1506 *result = '?';
1507
1508 return result + 1;
1509 }
1510};
1511
1513 typedef uint8_t type;
1514
1515 template <typename Traits>
1516 static inline typename Traits::value_type process(
1517 const uint8_t* data, size_t size, typename Traits::value_type result,
1518 Traits) {
1519 const uint8_t utf8_byte_mask = 0x3f;
1520
1521 while (size) {
1522 uint8_t lead = *data;
1523
1524 // 0xxxxxxx -> U+0000..U+007F
1525 if (lead < 0x80) {
1526 result = Traits::low(result, lead);
1527 data += 1;
1528 size -= 1;
1529
1530 // process aligned single-byte (ascii) blocks
1531 if ((reinterpret_cast<uintptr_t>(data) & 3) == 0) {
1532 // round-trip through void* to silence 'cast increases required
1533 // alignment of target type' warnings
1534 while (size >= 4 && (*static_cast<const uint32_t*>(
1535 static_cast<const void*>(data)) &
1536 0x80808080) == 0) {
1537 result = Traits::low(result, data[0]);
1538 result = Traits::low(result, data[1]);
1539 result = Traits::low(result, data[2]);
1540 result = Traits::low(result, data[3]);
1541 data += 4;
1542 size -= 4;
1543 }
1544 }
1545 }
1546 // 110xxxxx -> U+0080..U+07FF
1547 else if (static_cast<unsigned int>(lead - 0xC0) < 0x20 && size >= 2 &&
1548 (data[1] & 0xc0) == 0x80) {
1549 result = Traits::low(
1550 result, ((lead & ~0xC0) << 6) | (data[1] & utf8_byte_mask));
1551 data += 2;
1552 size -= 2;
1553 }
1554 // 1110xxxx -> U+0800-U+FFFF
1555 else if (static_cast<unsigned int>(lead - 0xE0) < 0x10 && size >= 3 &&
1556 (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80) {
1557 result = Traits::low(result, ((lead & ~0xE0) << 12) |
1558 ((data[1] & utf8_byte_mask) << 6) |
1559 (data[2] & utf8_byte_mask));
1560 data += 3;
1561 size -= 3;
1562 }
1563 // 11110xxx -> U+10000..U+10FFFF
1564 else if (static_cast<unsigned int>(lead - 0xF0) < 0x08 && size >= 4 &&
1565 (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80 &&
1566 (data[3] & 0xc0) == 0x80) {
1567 result = Traits::high(result, ((lead & ~0xF0) << 18) |
1568 ((data[1] & utf8_byte_mask) << 12) |
1569 ((data[2] & utf8_byte_mask) << 6) |
1570 (data[3] & utf8_byte_mask));
1571 data += 4;
1572 size -= 4;
1573 }
1574 // 10xxxxxx or 11111xxx -> invalid
1575 else {
1576 data += 1;
1577 size -= 1;
1578 }
1579 }
1580
1581 return result;
1582 }
1583};
1584
1585template <typename opt_swap>
1587 typedef uint16_t type;
1588
1589 template <typename Traits>
1590 static inline typename Traits::value_type process(
1591 const uint16_t* data, size_t size, typename Traits::value_type result,
1592 Traits) {
1593 while (size) {
1594 uint16_t lead = opt_swap::value ? endian_swap(*data) : *data;
1595
1596 // U+0000..U+D7FF
1597 if (lead < 0xD800) {
1598 result = Traits::low(result, lead);
1599 data += 1;
1600 size -= 1;
1601 }
1602 // U+E000..U+FFFF
1603 else if (static_cast<unsigned int>(lead - 0xE000) < 0x2000) {
1604 result = Traits::low(result, lead);
1605 data += 1;
1606 size -= 1;
1607 }
1608 // surrogate pair lead
1609 else if (static_cast<unsigned int>(lead - 0xD800) < 0x400 && size >= 2) {
1610 uint16_t next = opt_swap::value ? endian_swap(data[1]) : data[1];
1611
1612 if (static_cast<unsigned int>(next - 0xDC00) < 0x400) {
1613 result = Traits::high(
1614 result, 0x10000 + ((lead & 0x3ff) << 10) + (next & 0x3ff));
1615 data += 2;
1616 size -= 2;
1617 } else {
1618 data += 1;
1619 size -= 1;
1620 }
1621 } else {
1622 data += 1;
1623 size -= 1;
1624 }
1625 }
1626
1627 return result;
1628 }
1629};
1630
1631template <typename opt_swap>
1633 typedef uint32_t type;
1634
1635 template <typename Traits>
1636 static inline typename Traits::value_type process(
1637 const uint32_t* data, size_t size, typename Traits::value_type result,
1638 Traits) {
1639 while (size) {
1640 uint32_t lead = opt_swap::value ? endian_swap(*data) : *data;
1641
1642 // U+0000..U+FFFF
1643 if (lead < 0x10000) {
1644 result = Traits::low(result, lead);
1645 data += 1;
1646 size -= 1;
1647 }
1648 // U+10000..U+10FFFF
1649 else {
1650 result = Traits::high(result, lead);
1651 data += 1;
1652 size -= 1;
1653 }
1654 }
1655
1656 return result;
1657 }
1658};
1659
1661 typedef uint8_t type;
1662
1663 template <typename Traits>
1664 static inline typename Traits::value_type process(
1665 const uint8_t* data, size_t size, typename Traits::value_type result,
1666 Traits) {
1667 while (size) {
1668 result = Traits::low(result, *data);
1669 data += 1;
1670 size -= 1;
1671 }
1672
1673 return result;
1674 }
1675};
1676
1677template <size_t size>
1679
1680template <>
1682 typedef uint16_t type;
1683 typedef utf16_counter counter;
1684 typedef utf16_writer writer;
1686};
1687
1688template <>
1690 typedef uint32_t type;
1691 typedef utf32_counter counter;
1692 typedef utf32_writer writer;
1694};
1695
1696typedef wchar_selector<sizeof(wchar_t)>::counter wchar_counter;
1697typedef wchar_selector<sizeof(wchar_t)>::writer wchar_writer;
1698
1700 typedef wchar_t type;
1701
1702 template <typename Traits>
1703 static inline typename Traits::value_type process(
1704 const wchar_t* data, size_t size, typename Traits::value_type result,
1705 Traits traits) {
1706 typedef wchar_selector<sizeof(wchar_t)>::decoder decoder;
1707
1708 return decoder::process(
1709 reinterpret_cast<const typename decoder::type*>(data), size, result,
1710 traits);
1711 }
1712};
1713
1714#ifdef PUGIXML_WCHAR_MODE
1715PUGI__FN void convert_wchar_endian_swap(wchar_t* result, const wchar_t* data,
1716 size_t length) {
1717 for (size_t i = 0; i < length; ++i)
1718 result[i] = static_cast<wchar_t>(endian_swap(
1719 static_cast<wchar_selector<sizeof(wchar_t)>::type>(data[i])));
1720}
1721#endif
1722PUGI__NS_END
1723
1724PUGI__NS_BEGIN
1725enum chartype_t {
1726 ct_parse_pcdata = 1, // \0, &, \r, <
1727 ct_parse_attr = 2, // \0, &, \r, ', "
1728 ct_parse_attr_ws = 4, // \0, &, \r, ', ", \n, tab
1729 ct_space = 8, // \r, \n, space, tab
1730 ct_parse_cdata = 16, // \0, ], >, \r
1731 ct_parse_comment = 32, // \0, -, >, \r
1732 ct_symbol = 64, // Any symbol > 127, a-z, A-Z, 0-9, _, :, -, .
1733 ct_start_symbol = 128 // Any symbol > 127, a-z, A-Z, _, :
1734};
1735
1736static const unsigned char chartype_table[256] = {
1737 55, 0, 0, 0, 0, 0, 0, 0, 0, 12, 12, 0, 0, 63,
1738 0, 0, // 0-15
1739 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1740 0, 0, // 16-31
1741 8, 0, 6, 0, 0, 0, 7, 6, 0, 0, 0, 0, 0, 96,
1742 64, 0, // 32-47
1743 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 192, 0, 1, 0,
1744 48, 0, // 48-63
1745 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1746 192, 192, // 64-79
1747 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 16,
1748 0, 192, // 80-95
1749 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1750 192, 192, // 96-111
1751 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 0,
1752 0, 0, // 112-127
1753
1754 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1755 192, 192, // 128+
1756 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1757 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1758 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1759 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1760 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1761 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1762 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1763 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192};
1764
1765enum chartypex_t {
1766 ctx_special_pcdata =
1767 1, // Any symbol >= 0 and < 32 (except \t, \r, \n), &, <, >
1768 ctx_special_attr = 2, // Any symbol >= 0 and < 32 (except \t), &, <, >, "
1769 ctx_start_symbol = 4, // Any symbol > 127, a-z, A-Z, _
1770 ctx_digit = 8, // 0-9
1771 ctx_symbol = 16 // Any symbol > 127, a-z, A-Z, 0-9, _, -, .
1772};
1773
1774static const unsigned char chartypex_table[256] = {
1775 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 2, 3, 3, 2, 3, 3, // 0-15
1776 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 16-31
1777 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 16, 16, 0, // 32-47
1778 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 3, 0, 3, 0, // 48-63
1779
1780 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 64-79
1781 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 20, // 80-95
1782 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 96-111
1783 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 0, // 112-127
1784
1785 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 128+
1786 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1787 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1788 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1789 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1790 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1791 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20};
1792
1793#ifdef PUGIXML_WCHAR_MODE
1794#define PUGI__IS_CHARTYPE_IMPL(c, ct, table) \
1795 ((static_cast<unsigned int>(c) < 128 ? table[static_cast<unsigned int>(c)] \
1796 : table[128]) & \
1797 (ct))
1798#else
1799#define PUGI__IS_CHARTYPE_IMPL(c, ct, table) \
1800 (table[static_cast<unsigned char>(c)] & (ct))
1801#endif
1802
1803#define PUGI__IS_CHARTYPE(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartype_table)
1804#define PUGI__IS_CHARTYPEX(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartypex_table)
1805
1806PUGI__FN bool is_little_endian() {
1807 unsigned int ui = 1;
1808
1809 return *reinterpret_cast<unsigned char*>(&ui) == 1;
1810}
1811
1812PUGI__FN xml_encoding get_wchar_encoding() {
1813 PUGI__STATIC_ASSERT(sizeof(wchar_t) == 2 || sizeof(wchar_t) == 4);
1814
1815 if (sizeof(wchar_t) == 2)
1816 return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
1817 else
1818 return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
1819}
1820
1821PUGI__FN bool parse_declaration_encoding(const uint8_t* data, size_t size,
1822 const uint8_t*& out_encoding,
1823 size_t& out_length) {
1824#define PUGI__SCANCHAR(ch) \
1825 { \
1826 if (offset >= size || data[offset] != ch) return false; \
1827 offset++; \
1828 }
1829#define PUGI__SCANCHARTYPE(ct) \
1830 { \
1831 while (offset < size && PUGI__IS_CHARTYPE(data[offset], ct)) offset++; \
1832 }
1833
1834 // check if we have a non-empty XML declaration
1835 if (size < 6 || !((data[0] == '<') & (data[1] == '?') & (data[2] == 'x') &
1836 (data[3] == 'm') & (data[4] == 'l') &&
1837 PUGI__IS_CHARTYPE(data[5], ct_space)))
1838 return false;
1839
1840 // scan XML declaration until the encoding field
1841 for (size_t i = 6; i + 1 < size; ++i) {
1842 // declaration can not contain ? in quoted values
1843 if (data[i] == '?') return false;
1844
1845 if (data[i] == 'e' && data[i + 1] == 'n') {
1846 size_t offset = i;
1847
1848 // encoding follows the version field which can't contain 'en' so this has
1849 // to be the encoding if XML is well formed
1850 PUGI__SCANCHAR('e');
1851 PUGI__SCANCHAR('n');
1852 PUGI__SCANCHAR('c');
1853 PUGI__SCANCHAR('o');
1854 PUGI__SCANCHAR('d');
1855 PUGI__SCANCHAR('i');
1856 PUGI__SCANCHAR('n');
1857 PUGI__SCANCHAR('g');
1858
1859 // S? = S?
1860 PUGI__SCANCHARTYPE(ct_space);
1861 PUGI__SCANCHAR('=');
1862 PUGI__SCANCHARTYPE(ct_space);
1863
1864 // the only two valid delimiters are ' and "
1865 uint8_t delimiter = (offset < size && data[offset] == '"') ? '"' : '\'';
1866
1867 PUGI__SCANCHAR(delimiter);
1868
1869 size_t start = offset;
1870
1871 out_encoding = data + offset;
1872
1873 PUGI__SCANCHARTYPE(ct_symbol);
1874
1875 out_length = offset - start;
1876
1877 PUGI__SCANCHAR(delimiter);
1878
1879 return true;
1880 }
1881 }
1882
1883 return false;
1884
1885#undef PUGI__SCANCHAR
1886#undef PUGI__SCANCHARTYPE
1887}
1888
1889PUGI__FN xml_encoding guess_buffer_encoding(const uint8_t* data, size_t size) {
1890 // skip encoding autodetection if input buffer is too small
1891 if (size < 4) return encoding_utf8;
1892
1893 uint8_t d0 = data[0], d1 = data[1], d2 = data[2], d3 = data[3];
1894
1895 // look for BOM in first few bytes
1896 if (d0 == 0 && d1 == 0 && d2 == 0xfe && d3 == 0xff) return encoding_utf32_be;
1897 if (d0 == 0xff && d1 == 0xfe && d2 == 0 && d3 == 0) return encoding_utf32_le;
1898 if (d0 == 0xfe && d1 == 0xff) return encoding_utf16_be;
1899 if (d0 == 0xff && d1 == 0xfe) return encoding_utf16_le;
1900 if (d0 == 0xef && d1 == 0xbb && d2 == 0xbf) return encoding_utf8;
1901
1902 // look for <, <? or <?xm in various encodings
1903 if (d0 == 0 && d1 == 0 && d2 == 0 && d3 == 0x3c) return encoding_utf32_be;
1904 if (d0 == 0x3c && d1 == 0 && d2 == 0 && d3 == 0) return encoding_utf32_le;
1905 if (d0 == 0 && d1 == 0x3c && d2 == 0 && d3 == 0x3f) return encoding_utf16_be;
1906 if (d0 == 0x3c && d1 == 0 && d2 == 0x3f && d3 == 0) return encoding_utf16_le;
1907
1908 // look for utf16 < followed by node name (this may fail, but is better than
1909 // utf8 since it's zero terminated so early)
1910 if (d0 == 0 && d1 == 0x3c) return encoding_utf16_be;
1911 if (d0 == 0x3c && d1 == 0) return encoding_utf16_le;
1912
1913 // no known BOM detected; parse declaration
1914 const uint8_t* enc = 0;
1915 size_t enc_length = 0;
1916
1917 if (d0 == 0x3c && d1 == 0x3f && d2 == 0x78 && d3 == 0x6d &&
1918 parse_declaration_encoding(data, size, enc, enc_length)) {
1919 // iso-8859-1 (case-insensitive)
1920 if (enc_length == 10 && (enc[0] | ' ') == 'i' && (enc[1] | ' ') == 's' &&
1921 (enc[2] | ' ') == 'o' && enc[3] == '-' && enc[4] == '8' &&
1922 enc[5] == '8' && enc[6] == '5' && enc[7] == '9' && enc[8] == '-' &&
1923 enc[9] == '1')
1924 return encoding_latin1;
1925
1926 // latin1 (case-insensitive)
1927 if (enc_length == 6 && (enc[0] | ' ') == 'l' && (enc[1] | ' ') == 'a' &&
1928 (enc[2] | ' ') == 't' && (enc[3] | ' ') == 'i' &&
1929 (enc[4] | ' ') == 'n' && enc[5] == '1')
1930 return encoding_latin1;
1931 }
1932
1933 return encoding_utf8;
1934}
1935
1936PUGI__FN xml_encoding get_buffer_encoding(xml_encoding encoding,
1937 const void* contents, size_t size) {
1938 // replace wchar encoding with utf implementation
1939 if (encoding == encoding_wchar) return get_wchar_encoding();
1940
1941 // replace utf16 encoding with utf16 with specific endianness
1942 if (encoding == encoding_utf16)
1943 return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
1944
1945 // replace utf32 encoding with utf32 with specific endianness
1946 if (encoding == encoding_utf32)
1947 return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
1948
1949 // only do autodetection if no explicit encoding is requested
1950 if (encoding != encoding_auto) return encoding;
1951
1952 // try to guess encoding (based on XML specification, Appendix F.1)
1953 const uint8_t* data = static_cast<const uint8_t*>(contents);
1954
1955 return guess_buffer_encoding(data, size);
1956}
1957
1958PUGI__FN bool get_mutable_buffer(char_t*& out_buffer, size_t& out_length,
1959 const void* contents, size_t size,
1960 bool is_mutable) {
1961 size_t length = size / sizeof(char_t);
1962
1963 if (is_mutable) {
1964 out_buffer = static_cast<char_t*>(const_cast<void*>(contents));
1965 out_length = length;
1966 } else {
1967 char_t* buffer = static_cast<char_t*>(
1968 xml_memory::allocate((length + 1) * sizeof(char_t)));
1969 if (!buffer) return false;
1970
1971 if (contents)
1972 memcpy(buffer, contents, length * sizeof(char_t));
1973 else
1974 assert(length == 0);
1975
1976 buffer[length] = 0;
1977
1978 out_buffer = buffer;
1979 out_length = length + 1;
1980 }
1981
1982 return true;
1983}
1984
1985#ifdef PUGIXML_WCHAR_MODE
1986PUGI__FN bool need_endian_swap_utf(xml_encoding le, xml_encoding re) {
1987 return (le == encoding_utf16_be && re == encoding_utf16_le) ||
1988 (le == encoding_utf16_le && re == encoding_utf16_be) ||
1989 (le == encoding_utf32_be && re == encoding_utf32_le) ||
1990 (le == encoding_utf32_le && re == encoding_utf32_be);
1991}
1992
1993PUGI__FN bool convert_buffer_endian_swap(char_t*& out_buffer,
1994 size_t& out_length,
1995 const void* contents, size_t size,
1996 bool is_mutable) {
1997 const char_t* data = static_cast<const char_t*>(contents);
1998 size_t length = size / sizeof(char_t);
1999
2000 if (is_mutable) {
2001 char_t* buffer = const_cast<char_t*>(data);
2002
2003 convert_wchar_endian_swap(buffer, data, length);
2004
2005 out_buffer = buffer;
2006 out_length = length;
2007 } else {
2008 char_t* buffer = static_cast<char_t*>(
2009 xml_memory::allocate((length + 1) * sizeof(char_t)));
2010 if (!buffer) return false;
2011
2012 convert_wchar_endian_swap(buffer, data, length);
2013 buffer[length] = 0;
2014
2015 out_buffer = buffer;
2016 out_length = length + 1;
2017 }
2018
2019 return true;
2020}
2021
2022template <typename D>
2023PUGI__FN bool convert_buffer_generic(char_t*& out_buffer, size_t& out_length,
2024 const void* contents, size_t size, D) {
2025 const typename D::type* data = static_cast<const typename D::type*>(contents);
2026 size_t data_length = size / sizeof(typename D::type);
2027
2028 // first pass: get length in wchar_t units
2029 size_t length = D::process(data, data_length, 0, wchar_counter());
2030
2031 // allocate buffer of suitable length
2032 char_t* buffer =
2033 static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
2034 if (!buffer) return false;
2035
2036 // second pass: convert utf16 input to wchar_t
2037 wchar_writer::value_type obegin =
2038 reinterpret_cast<wchar_writer::value_type>(buffer);
2039 wchar_writer::value_type oend =
2040 D::process(data, data_length, obegin, wchar_writer());
2041
2042 assert(oend == obegin + length);
2043 *oend = 0;
2044
2045 out_buffer = buffer;
2046 out_length = length + 1;
2047
2048 return true;
2049}
2050
2051PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length,
2052 xml_encoding encoding, const void* contents,
2053 size_t size, bool is_mutable) {
2054 // get native encoding
2055 xml_encoding wchar_encoding = get_wchar_encoding();
2056
2057 // fast path: no conversion required
2058 if (encoding == wchar_encoding)
2059 return get_mutable_buffer(out_buffer, out_length, contents, size,
2060 is_mutable);
2061
2062 // only endian-swapping is required
2063 if (need_endian_swap_utf(encoding, wchar_encoding))
2064 return convert_buffer_endian_swap(out_buffer, out_length, contents, size,
2065 is_mutable);
2066
2067 // source encoding is utf8
2068 if (encoding == encoding_utf8)
2069 return convert_buffer_generic(out_buffer, out_length, contents, size,
2070 utf8_decoder());
2071
2072 // source encoding is utf16
2073 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) {
2074 xml_encoding native_encoding =
2075 is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
2076
2077 return (native_encoding == encoding)
2078 ? convert_buffer_generic(out_buffer, out_length, contents, size,
2080 : convert_buffer_generic(out_buffer, out_length, contents, size,
2082 }
2083
2084 // source encoding is utf32
2085 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) {
2086 xml_encoding native_encoding =
2087 is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
2088
2089 return (native_encoding == encoding)
2090 ? convert_buffer_generic(out_buffer, out_length, contents, size,
2092 : convert_buffer_generic(out_buffer, out_length, contents, size,
2094 }
2095
2096 // source encoding is latin1
2097 if (encoding == encoding_latin1)
2098 return convert_buffer_generic(out_buffer, out_length, contents, size,
2099 latin1_decoder());
2100
2101 assert(false && "Invalid encoding");
2102 return false;
2103}
2104#else
2105template <typename D>
2106PUGI__FN bool convert_buffer_generic(char_t*& out_buffer, size_t& out_length,
2107 const void* contents, size_t size, D) {
2108 const typename D::type* data = static_cast<const typename D::type*>(contents);
2109 size_t data_length = size / sizeof(typename D::type);
2110
2111 // first pass: get length in utf8 units
2112 size_t length = D::process(data, data_length, 0, utf8_counter());
2113
2114 // allocate buffer of suitable length
2115 char_t* buffer =
2116 static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
2117 if (!buffer) return false;
2118
2119 // second pass: convert utf16 input to utf8
2120 uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer);
2121 uint8_t* oend = D::process(data, data_length, obegin, utf8_writer());
2122
2123 assert(oend == obegin + length);
2124 *oend = 0;
2125
2126 out_buffer = buffer;
2127 out_length = length + 1;
2128
2129 return true;
2130}
2131
2132PUGI__FN size_t get_latin1_7bit_prefix_length(const uint8_t* data,
2133 size_t size) {
2134 for (size_t i = 0; i < size; ++i)
2135 if (data[i] > 127) return i;
2136
2137 return size;
2138}
2139
2140PUGI__FN bool convert_buffer_latin1(char_t*& out_buffer, size_t& out_length,
2141 const void* contents, size_t size,
2142 bool is_mutable) {
2143 const uint8_t* data = static_cast<const uint8_t*>(contents);
2144 size_t data_length = size;
2145
2146 // get size of prefix that does not need utf8 conversion
2147 size_t prefix_length = get_latin1_7bit_prefix_length(data, data_length);
2148 assert(prefix_length <= data_length);
2149
2150 const uint8_t* postfix = data + prefix_length;
2151 size_t postfix_length = data_length - prefix_length;
2152
2153 // if no conversion is needed, just return the original buffer
2154 if (postfix_length == 0)
2155 return get_mutable_buffer(out_buffer, out_length, contents, size,
2156 is_mutable);
2157
2158 // first pass: get length in utf8 units
2159 size_t length =
2160 prefix_length +
2161 latin1_decoder::process(postfix, postfix_length, 0, utf8_counter());
2162
2163 // allocate buffer of suitable length
2164 char_t* buffer =
2165 static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
2166 if (!buffer) return false;
2167
2168 // second pass: convert latin1 input to utf8
2169 memcpy(buffer, data, prefix_length);
2170
2171 uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer);
2172 uint8_t* oend = latin1_decoder::process(
2173 postfix, postfix_length, obegin + prefix_length, utf8_writer());
2174
2175 assert(oend == obegin + length);
2176 *oend = 0;
2177
2178 out_buffer = buffer;
2179 out_length = length + 1;
2180
2181 return true;
2182}
2183
2184PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length,
2185 xml_encoding encoding, const void* contents,
2186 size_t size, bool is_mutable) {
2187 // fast path: no conversion required
2188 if (encoding == encoding_utf8)
2189 return get_mutable_buffer(out_buffer, out_length, contents, size,
2190 is_mutable);
2191
2192 // source encoding is utf16
2193 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) {
2194 xml_encoding native_encoding =
2195 is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
2196
2197 return (native_encoding == encoding)
2198 ? convert_buffer_generic(out_buffer, out_length, contents, size,
2200 : convert_buffer_generic(out_buffer, out_length, contents, size,
2202 }
2203
2204 // source encoding is utf32
2205 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) {
2206 xml_encoding native_encoding =
2207 is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
2208
2209 return (native_encoding == encoding)
2210 ? convert_buffer_generic(out_buffer, out_length, contents, size,
2212 : convert_buffer_generic(out_buffer, out_length, contents, size,
2214 }
2215
2216 // source encoding is latin1
2217 if (encoding == encoding_latin1)
2218 return convert_buffer_latin1(out_buffer, out_length, contents, size,
2219 is_mutable);
2220
2221 assert(false && "Invalid encoding");
2222 return false;
2223}
2224#endif
2225
2226PUGI__FN size_t as_utf8_begin(const wchar_t* str, size_t length) {
2227 // get length in utf8 characters
2228 return wchar_decoder::process(str, length, 0, utf8_counter());
2229}
2230
2231PUGI__FN void as_utf8_end(char* buffer, size_t size, const wchar_t* str,
2232 size_t length) {
2233 // convert to utf8
2234 uint8_t* begin = reinterpret_cast<uint8_t*>(buffer);
2235 uint8_t* end = wchar_decoder::process(str, length, begin, utf8_writer());
2236
2237 assert(begin + size == end);
2238 (void)!end;
2239 (void)!size;
2240}
2241
2242#ifndef PUGIXML_NO_STL
2243PUGI__FN std::string as_utf8_impl(const wchar_t* str, size_t length) {
2244 // first pass: get length in utf8 characters
2245 size_t size = as_utf8_begin(str, length);
2246
2247 // allocate resulting string
2248 std::string result;
2249 result.resize(size);
2250
2251 // second pass: convert to utf8
2252 if (size > 0) as_utf8_end(&result[0], size, str, length);
2253
2254 return result;
2255}
2256
2257PUGI__FN std::basic_string<wchar_t> as_wide_impl(const char* str, size_t size) {
2258 const uint8_t* data = reinterpret_cast<const uint8_t*>(str);
2259
2260 // first pass: get length in wchar_t units
2261 size_t length = utf8_decoder::process(data, size, 0, wchar_counter());
2262
2263 // allocate resulting string
2264 std::basic_string<wchar_t> result;
2265 result.resize(length);
2266
2267 // second pass: convert to wchar_t
2268 if (length > 0) {
2269 wchar_writer::value_type begin =
2270 reinterpret_cast<wchar_writer::value_type>(&result[0]);
2271 wchar_writer::value_type end =
2272 utf8_decoder::process(data, size, begin, wchar_writer());
2273
2274 assert(begin + length == end);
2275 (void)!end;
2276 }
2277
2278 return result;
2279}
2280#endif
2281
2282template <typename Header>
2283inline bool strcpy_insitu_allow(size_t length, const Header& header,
2284 uintptr_t header_mask, char_t* target) {
2285 // never reuse shared memory
2286 if (header & xml_memory_page_contents_shared_mask) return false;
2287
2288 size_t target_length = strlength(target);
2289
2290 // always reuse document buffer memory if possible
2291 if ((header & header_mask) == 0) return target_length >= length;
2292
2293 // reuse heap memory if waste is not too great
2294 const size_t reuse_threshold = 32;
2295
2296 return target_length >= length &&
2297 (target_length < reuse_threshold ||
2298 target_length - length < target_length / 2);
2299}
2300
2301template <typename String, typename Header>
2302PUGI__FN bool strcpy_insitu(String& dest, Header& header, uintptr_t header_mask,
2303 const char_t* source, size_t source_length) {
2304 if (source_length == 0) {
2305 // empty string and null pointer are equivalent, so just deallocate old
2306 // memory
2307 xml_allocator* alloc = PUGI__GETPAGE_IMPL(header)->allocator;
2308
2309 if (header & header_mask) alloc->deallocate_string(dest);
2310
2311 // mark the string as not allocated
2312 dest = 0;
2313 header &= ~header_mask;
2314
2315 return true;
2316 } else if (dest &&
2317 strcpy_insitu_allow(source_length, header, header_mask, dest)) {
2318 // we can reuse old buffer, so just copy the new data (including zero
2319 // terminator)
2320 memcpy(dest, source, source_length * sizeof(char_t));
2321 dest[source_length] = 0;
2322
2323 return true;
2324 } else {
2325 xml_allocator* alloc = PUGI__GETPAGE_IMPL(header)->allocator;
2326
2327 if (!alloc->reserve()) return false;
2328
2329 // allocate new buffer
2330 char_t* buf = alloc->allocate_string(source_length + 1);
2331 if (!buf) return false;
2332
2333 // copy the string (including zero terminator)
2334 memcpy(buf, source, source_length * sizeof(char_t));
2335 buf[source_length] = 0;
2336
2337 // deallocate old buffer (*after* the above to protect against overlapping
2338 // memory and/or allocation failures)
2339 if (header & header_mask) alloc->deallocate_string(dest);
2340
2341 // the string is now allocated, so set the flag
2342 dest = buf;
2343 header |= header_mask;
2344
2345 return true;
2346 }
2347}
2348
2349struct gap {
2350 char_t* end;
2351 size_t size;
2352
2353 gap() : end(0), size(0) {}
2354
2355 // Push new gap, move s count bytes further (skipping the gap).
2356 // Collapse previous gap.
2357 void push(char_t*& s, size_t count) {
2358 if (end) // there was a gap already; collapse it
2359 {
2360 // Move [old_gap_end, new_gap_start) to [old_gap_start, ...)
2361 assert(s >= end);
2362 memmove(end - size, end,
2363 reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end));
2364 }
2365
2366 s += count; // end of current gap
2367
2368 // "merge" two gaps
2369 end = s;
2370 size += count;
2371 }
2372
2373 // Collapse all gaps, return past-the-end pointer
2374 char_t* flush(char_t* s) {
2375 if (end) {
2376 // Move [old_gap_end, current_pos) to [old_gap_start, ...)
2377 assert(s >= end);
2378 memmove(end - size, end,
2379 reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end));
2380
2381 return s - size;
2382 } else
2383 return s;
2384 }
2385};
2386
2387PUGI__FN char_t* strconv_escape(char_t* s, gap& g) {
2388 char_t* stre = s + 1;
2389
2390 switch (*stre) {
2391 case '#': // &#...
2392 {
2393 unsigned int ucsc = 0;
2394
2395 if (stre[1] == 'x') // &#x... (hex code)
2396 {
2397 stre += 2;
2398
2399 char_t ch = *stre;
2400
2401 if (ch == ';') return stre;
2402
2403 for (;;) {
2404 if (static_cast<unsigned int>(ch - '0') <= 9)
2405 ucsc = 16 * ucsc + (ch - '0');
2406 else if (static_cast<unsigned int>((ch | ' ') - 'a') <= 5)
2407 ucsc = 16 * ucsc + ((ch | ' ') - 'a' + 10);
2408 else if (ch == ';')
2409 break;
2410 else // cancel
2411 return stre;
2412
2413 ch = *++stre;
2414 }
2415
2416 ++stre;
2417 } else // &#... (dec code)
2418 {
2419 char_t ch = *++stre;
2420
2421 if (ch == ';') return stre;
2422
2423 for (;;) {
2424 if (static_cast<unsigned int>(static_cast<unsigned int>(ch) - '0') <=
2425 9)
2426 ucsc = 10 * ucsc + (ch - '0');
2427 else if (ch == ';')
2428 break;
2429 else // cancel
2430 return stre;
2431
2432 ch = *++stre;
2433 }
2434
2435 ++stre;
2436 }
2437
2438#ifdef PUGIXML_WCHAR_MODE
2439 s = reinterpret_cast<char_t*>(wchar_writer::any(
2440 reinterpret_cast<wchar_writer::value_type>(s), ucsc));
2441#else
2442 s = reinterpret_cast<char_t*>(
2443 utf8_writer::any(reinterpret_cast<uint8_t*>(s), ucsc));
2444#endif
2445
2446 g.push(s, stre - s);
2447 return stre;
2448 }
2449
2450 case 'a': // &a
2451 {
2452 ++stre;
2453
2454 if (*stre == 'm') // &am
2455 {
2456 if (*++stre == 'p' && *++stre == ';') // &amp;
2457 {
2458 *s++ = '&';
2459 ++stre;
2460
2461 g.push(s, stre - s);
2462 return stre;
2463 }
2464 } else if (*stre == 'p') // &ap
2465 {
2466 if (*++stre == 'o' && *++stre == 's' && *++stre == ';') // &apos;
2467 {
2468 *s++ = '\'';
2469 ++stre;
2470
2471 g.push(s, stre - s);
2472 return stre;
2473 }
2474 }
2475 break;
2476 }
2477
2478 case 'g': // &g
2479 {
2480 if (*++stre == 't' && *++stre == ';') // &gt;
2481 {
2482 *s++ = '>';
2483 ++stre;
2484
2485 g.push(s, stre - s);
2486 return stre;
2487 }
2488 break;
2489 }
2490
2491 case 'l': // &l
2492 {
2493 if (*++stre == 't' && *++stre == ';') // &lt;
2494 {
2495 *s++ = '<';
2496 ++stre;
2497
2498 g.push(s, stre - s);
2499 return stre;
2500 }
2501 break;
2502 }
2503
2504 case 'q': // &q
2505 {
2506 if (*++stre == 'u' && *++stre == 'o' && *++stre == 't' &&
2507 *++stre == ';') // &quot;
2508 {
2509 *s++ = '"';
2510 ++stre;
2511
2512 g.push(s, stre - s);
2513 return stre;
2514 }
2515 break;
2516 }
2517
2518 default:
2519 break;
2520 }
2521
2522 return stre;
2523}
2524
2525// Parser utilities
2526#define PUGI__ENDSWITH(c, e) ((c) == (e) || ((c) == 0 && endch == (e)))
2527#define PUGI__SKIPWS() \
2528 { \
2529 while (PUGI__IS_CHARTYPE(*s, ct_space)) ++s; \
2530 }
2531#define PUGI__OPTSET(OPT) (optmsk & (OPT))
2532#define PUGI__PUSHNODE(TYPE) \
2533 { \
2534 cursor = append_new_node(cursor, *alloc, TYPE); \
2535 if (!cursor) PUGI__THROW_ERROR(status_out_of_memory, s); \
2536 }
2537#define PUGI__POPNODE() \
2538 { cursor = cursor->parent; }
2539#define PUGI__SCANFOR(X) \
2540 { \
2541 while (*s != 0 && !(X)) ++s; \
2542 }
2543#define PUGI__SCANWHILE(X) \
2544 { \
2545 while (X) ++s; \
2546 }
2547#define PUGI__SCANWHILE_UNROLL(X) \
2548 { \
2549 for (;;) { \
2550 char_t ss = s[0]; \
2551 if (PUGI__UNLIKELY(!(X))) { \
2552 break; \
2553 } \
2554 ss = s[1]; \
2555 if (PUGI__UNLIKELY(!(X))) { \
2556 s += 1; \
2557 break; \
2558 } \
2559 ss = s[2]; \
2560 if (PUGI__UNLIKELY(!(X))) { \
2561 s += 2; \
2562 break; \
2563 } \
2564 ss = s[3]; \
2565 if (PUGI__UNLIKELY(!(X))) { \
2566 s += 3; \
2567 break; \
2568 } \
2569 s += 4; \
2570 } \
2571 }
2572#define PUGI__ENDSEG() \
2573 { \
2574 ch = *s; \
2575 *s = 0; \
2576 ++s; \
2577 }
2578#define PUGI__THROW_ERROR(err, m) \
2579 return error_offset = m, error_status = err, static_cast<char_t*>(0)
2580#define PUGI__CHECK_ERROR(err, m) \
2581 { \
2582 if (*s == 0) PUGI__THROW_ERROR(err, m); \
2583 }
2584
2585PUGI__FN char_t* strconv_comment(char_t* s, char_t endch) {
2586 gap g;
2587
2588 while (true) {
2589 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_comment));
2590
2591 if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
2592 {
2593 *s++ = '\n'; // replace first one with 0x0a
2594
2595 if (*s == '\n') g.push(s, 1);
2596 } else if (s[0] == '-' && s[1] == '-' &&
2597 PUGI__ENDSWITH(s[2], '>')) // comment ends here
2598 {
2599 *g.flush(s) = 0;
2600
2601 return s + (s[2] == '>' ? 3 : 2);
2602 } else if (*s == 0) {
2603 return 0;
2604 } else
2605 ++s;
2606 }
2607}
2608
2609PUGI__FN char_t* strconv_cdata(char_t* s, char_t endch) {
2610 gap g;
2611
2612 while (true) {
2613 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_cdata));
2614
2615 if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
2616 {
2617 *s++ = '\n'; // replace first one with 0x0a
2618
2619 if (*s == '\n') g.push(s, 1);
2620 } else if (s[0] == ']' && s[1] == ']' &&
2621 PUGI__ENDSWITH(s[2], '>')) // CDATA ends here
2622 {
2623 *g.flush(s) = 0;
2624
2625 return s + 1;
2626 } else if (*s == 0) {
2627 return 0;
2628 } else
2629 ++s;
2630 }
2631}
2632
2633typedef char_t* (*strconv_pcdata_t)(char_t*);
2634
2635template <typename opt_trim, typename opt_eol, typename opt_escape>
2637 static char_t* parse(char_t* s) {
2638 gap g;
2639
2640 char_t* begin = s;
2641
2642 while (true) {
2643 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_pcdata));
2644
2645 if (*s == '<') // PCDATA ends here
2646 {
2647 char_t* end = g.flush(s);
2648
2649 if (opt_trim::value)
2650 while (end > begin && PUGI__IS_CHARTYPE(end[-1], ct_space)) --end;
2651
2652 *end = 0;
2653
2654 return s + 1;
2655 } else if (opt_eol::value &&
2656 *s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
2657 {
2658 *s++ = '\n'; // replace first one with 0x0a
2659
2660 if (*s == '\n') g.push(s, 1);
2661 } else if (opt_escape::value && *s == '&') {
2662 s = strconv_escape(s, g);
2663 } else if (*s == 0) {
2664 char_t* end = g.flush(s);
2665
2666 if (opt_trim::value)
2667 while (end > begin && PUGI__IS_CHARTYPE(end[-1], ct_space)) --end;
2668
2669 *end = 0;
2670
2671 return s;
2672 } else
2673 ++s;
2674 }
2675 }
2676};
2677
2678PUGI__FN strconv_pcdata_t get_strconv_pcdata(unsigned int optmask) {
2679 PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 &&
2680 parse_trim_pcdata == 0x0800);
2681
2682 switch (((optmask >> 4) & 3) |
2683 ((optmask >> 9) & 4)) // get bitmask for flags (eol escapes trim)
2684 {
2685 case 0:
2687 case 1:
2689 case 2:
2691 case 3:
2693 case 4:
2695 case 5:
2697 case 6:
2699 case 7:
2701 default:
2702 assert(false);
2703 return 0; // should not get here
2704 }
2705}
2706
2707typedef char_t* (*strconv_attribute_t)(char_t*, char_t);
2708
2709template <typename opt_escape>
2711 static char_t* parse_wnorm(char_t* s, char_t end_quote) {
2712 gap g;
2713
2714 // trim leading whitespaces
2715 if (PUGI__IS_CHARTYPE(*s, ct_space)) {
2716 char_t* str = s;
2717
2718 do ++str;
2719 while (PUGI__IS_CHARTYPE(*str, ct_space));
2720
2721 g.push(s, str - s);
2722 }
2723
2724 while (true) {
2725 PUGI__SCANWHILE_UNROLL(
2726 !PUGI__IS_CHARTYPE(ss, ct_parse_attr_ws | ct_space));
2727
2728 if (*s == end_quote) {
2729 char_t* str = g.flush(s);
2730
2731 do *str-- = 0;
2732 while (PUGI__IS_CHARTYPE(*str, ct_space));
2733
2734 return s + 1;
2735 } else if (PUGI__IS_CHARTYPE(*s, ct_space)) {
2736 *s++ = ' ';
2737
2738 if (PUGI__IS_CHARTYPE(*s, ct_space)) {
2739 char_t* str = s + 1;
2740 while (PUGI__IS_CHARTYPE(*str, ct_space)) ++str;
2741
2742 g.push(s, str - s);
2743 }
2744 } else if (opt_escape::value && *s == '&') {
2745 s = strconv_escape(s, g);
2746 } else if (!*s) {
2747 return 0;
2748 } else
2749 ++s;
2750 }
2751 }
2752
2753 static char_t* parse_wconv(char_t* s, char_t end_quote) {
2754 gap g;
2755
2756 while (true) {
2757 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr_ws));
2758
2759 if (*s == end_quote) {
2760 *g.flush(s) = 0;
2761
2762 return s + 1;
2763 } else if (PUGI__IS_CHARTYPE(*s, ct_space)) {
2764 if (*s == '\r') {
2765 *s++ = ' ';
2766
2767 if (*s == '\n') g.push(s, 1);
2768 } else
2769 *s++ = ' ';
2770 } else if (opt_escape::value && *s == '&') {
2771 s = strconv_escape(s, g);
2772 } else if (!*s) {
2773 return 0;
2774 } else
2775 ++s;
2776 }
2777 }
2778
2779 static char_t* parse_eol(char_t* s, char_t end_quote) {
2780 gap g;
2781
2782 while (true) {
2783 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr));
2784
2785 if (*s == end_quote) {
2786 *g.flush(s) = 0;
2787
2788 return s + 1;
2789 } else if (*s == '\r') {
2790 *s++ = '\n';
2791
2792 if (*s == '\n') g.push(s, 1);
2793 } else if (opt_escape::value && *s == '&') {
2794 s = strconv_escape(s, g);
2795 } else if (!*s) {
2796 return 0;
2797 } else
2798 ++s;
2799 }
2800 }
2801
2802 static char_t* parse_simple(char_t* s, char_t end_quote) {
2803 gap g;
2804
2805 while (true) {
2806 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr));
2807
2808 if (*s == end_quote) {
2809 *g.flush(s) = 0;
2810
2811 return s + 1;
2812 } else if (opt_escape::value && *s == '&') {
2813 s = strconv_escape(s, g);
2814 } else if (!*s) {
2815 return 0;
2816 } else
2817 ++s;
2818 }
2819 }
2820};
2821
2822PUGI__FN strconv_attribute_t get_strconv_attribute(unsigned int optmask) {
2823 PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 &&
2824 parse_wconv_attribute == 0x40 &&
2825 parse_wnorm_attribute == 0x80);
2826
2827 switch ((optmask >> 4) &
2828 15) // get bitmask for flags (wconv wnorm eol escapes)
2829 {
2830 case 0:
2832 case 1:
2834 case 2:
2836 case 3:
2838 case 4:
2840 case 5:
2842 case 6:
2844 case 7:
2846 case 8:
2848 case 9:
2850 case 10:
2852 case 11:
2854 case 12:
2856 case 13:
2858 case 14:
2860 case 15:
2862 default:
2863 assert(false);
2864 return 0; // should not get here
2865 }
2866}
2867
2868inline xml_parse_result make_parse_result(xml_parse_status status,
2869 ptrdiff_t offset = 0) {
2870 xml_parse_result result;
2871 result.status = status;
2872 result.offset = offset;
2873
2874 return result;
2875}
2876
2878 xml_allocator* alloc;
2879 char_t* error_offset;
2880 xml_parse_status error_status;
2881
2882 xml_parser(xml_allocator* alloc_)
2883 : alloc(alloc_), error_offset(0), error_status(status_ok) {}
2884
2885 // DOCTYPE consists of nested sections of the following possible types:
2886 // <!-- ... -->, <? ... ?>, "...", '...'
2887 // <![...]]>
2888 // <!...>
2889 // First group can not contain nested groups
2890 // Second group can contain nested groups of the same type
2891 // Third group can contain all other groups
2892 char_t* parse_doctype_primitive(char_t* s) {
2893 if (*s == '"' || *s == '\'') {
2894 // quoted string
2895 char_t ch = *s++;
2896 PUGI__SCANFOR(*s == ch);
2897 if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
2898
2899 s++;
2900 } else if (s[0] == '<' && s[1] == '?') {
2901 // <? ... ?>
2902 s += 2;
2903 PUGI__SCANFOR(s[0] == '?' &&
2904 s[1] == '>'); // no need for ENDSWITH because ?> can't
2905 // terminate proper doctype
2906 if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
2907
2908 s += 2;
2909 } else if (s[0] == '<' && s[1] == '!' && s[2] == '-' && s[3] == '-') {
2910 s += 4;
2911 PUGI__SCANFOR(s[0] == '-' && s[1] == '-' &&
2912 s[2] == '>'); // no need for ENDSWITH because --> can't
2913 // terminate proper doctype
2914 if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
2915
2916 s += 3;
2917 } else
2918 PUGI__THROW_ERROR(status_bad_doctype, s);
2919
2920 return s;
2921 }
2922
2923 char_t* parse_doctype_ignore(char_t* s) {
2924 size_t depth = 0;
2925
2926 assert(s[0] == '<' && s[1] == '!' && s[2] == '[');
2927 s += 3;
2928
2929 while (*s) {
2930 if (s[0] == '<' && s[1] == '!' && s[2] == '[') {
2931 // nested ignore section
2932 s += 3;
2933 depth++;
2934 } else if (s[0] == ']' && s[1] == ']' && s[2] == '>') {
2935 // ignore section end
2936 s += 3;
2937
2938 if (depth == 0) return s;
2939
2940 depth--;
2941 } else
2942 s++;
2943 }
2944
2945 PUGI__THROW_ERROR(status_bad_doctype, s);
2946 }
2947
2948 char_t* parse_doctype_group(char_t* s, char_t endch) {
2949 size_t depth = 0;
2950
2951 assert((s[0] == '<' || s[0] == 0) && s[1] == '!');
2952 s += 2;
2953
2954 while (*s) {
2955 if (s[0] == '<' && s[1] == '!' && s[2] != '-') {
2956 if (s[2] == '[') {
2957 // ignore
2958 s = parse_doctype_ignore(s);
2959 if (!s) return s;
2960 } else {
2961 // some control group
2962 s += 2;
2963 depth++;
2964 }
2965 } else if (s[0] == '<' || s[0] == '"' || s[0] == '\'') {
2966 // unknown tag (forbidden), or some primitive group
2967 s = parse_doctype_primitive(s);
2968 if (!s) return s;
2969 } else if (*s == '>') {
2970 if (depth == 0) return s;
2971
2972 depth--;
2973 s++;
2974 } else
2975 s++;
2976 }
2977
2978 if (depth != 0 || endch != '>') PUGI__THROW_ERROR(status_bad_doctype, s);
2979
2980 return s;
2981 }
2982
2983 char_t* parse_exclamation(char_t* s, xml_node_struct* cursor,
2984 unsigned int optmsk, char_t endch) {
2985 // parse node contents, starting with exclamation mark
2986 ++s;
2987
2988 if (*s == '-') // '<!-...'
2989 {
2990 ++s;
2991
2992 if (*s == '-') // '<!--...'
2993 {
2994 ++s;
2995
2996 if (PUGI__OPTSET(parse_comments)) {
2997 PUGI__PUSHNODE(node_comment); // Append a new node on the tree.
2998 cursor->value = s; // Save the offset.
2999 }
3000
3001 if (PUGI__OPTSET(parse_eol) && PUGI__OPTSET(parse_comments)) {
3002 s = strconv_comment(s, endch);
3003
3004 if (!s) PUGI__THROW_ERROR(status_bad_comment, cursor->value);
3005 } else {
3006 // Scan for terminating '-->'.
3007 PUGI__SCANFOR(s[0] == '-' && s[1] == '-' &&
3008 PUGI__ENDSWITH(s[2], '>'));
3009 PUGI__CHECK_ERROR(status_bad_comment, s);
3010
3011 if (PUGI__OPTSET(parse_comments))
3012 *s =
3013 0; // Zero-terminate this segment at the first terminating '-'.
3014
3015 s += (s[2] == '>' ? 3 : 2); // Step over the '\0->'.
3016 }
3017 } else
3018 PUGI__THROW_ERROR(status_bad_comment, s);
3019 } else if (*s == '[') {
3020 // '<![CDATA[...'
3021 if (*++s == 'C' && *++s == 'D' && *++s == 'A' && *++s == 'T' &&
3022 *++s == 'A' && *++s == '[') {
3023 ++s;
3024
3025 if (PUGI__OPTSET(parse_cdata)) {
3026 PUGI__PUSHNODE(node_cdata); // Append a new node on the tree.
3027 cursor->value = s; // Save the offset.
3028
3029 if (PUGI__OPTSET(parse_eol)) {
3030 s = strconv_cdata(s, endch);
3031
3032 if (!s) PUGI__THROW_ERROR(status_bad_cdata, cursor->value);
3033 } else {
3034 // Scan for terminating ']]>'.
3035 PUGI__SCANFOR(s[0] == ']' && s[1] == ']' &&
3036 PUGI__ENDSWITH(s[2], '>'));
3037 PUGI__CHECK_ERROR(status_bad_cdata, s);
3038
3039 *s++ = 0; // Zero-terminate this segment.
3040 }
3041 } else // Flagged for discard, but we still have to scan for the
3042 // terminator.
3043 {
3044 // Scan for terminating ']]>'.
3045 PUGI__SCANFOR(s[0] == ']' && s[1] == ']' &&
3046 PUGI__ENDSWITH(s[2], '>'));
3047 PUGI__CHECK_ERROR(status_bad_cdata, s);
3048
3049 ++s;
3050 }
3051
3052 s += (s[1] == '>' ? 2 : 1); // Step over the last ']>'.
3053 } else
3054 PUGI__THROW_ERROR(status_bad_cdata, s);
3055 } else if (s[0] == 'D' && s[1] == 'O' && s[2] == 'C' && s[3] == 'T' &&
3056 s[4] == 'Y' && s[5] == 'P' && PUGI__ENDSWITH(s[6], 'E')) {
3057 s -= 2;
3058
3059 if (cursor->parent) PUGI__THROW_ERROR(status_bad_doctype, s);
3060
3061 char_t* mark = s + 9;
3062
3063 s = parse_doctype_group(s, endch);
3064 if (!s) return s;
3065
3066 assert((*s == 0 && endch == '>') || *s == '>');
3067 if (*s) *s++ = 0;
3068
3069 if (PUGI__OPTSET(parse_doctype)) {
3070 while (PUGI__IS_CHARTYPE(*mark, ct_space)) ++mark;
3071
3072 PUGI__PUSHNODE(node_doctype);
3073
3074 cursor->value = mark;
3075 }
3076 } else if (*s == 0 && endch == '-')
3077 PUGI__THROW_ERROR(status_bad_comment, s);
3078 else if (*s == 0 && endch == '[')
3079 PUGI__THROW_ERROR(status_bad_cdata, s);
3080 else
3081 PUGI__THROW_ERROR(status_unrecognized_tag, s);
3082
3083 return s;
3084 }
3085
3086 char_t* parse_question(char_t* s, xml_node_struct*& ref_cursor,
3087 unsigned int optmsk, char_t endch) {
3088 // load into registers
3089 xml_node_struct* cursor = ref_cursor;
3090 char_t ch = 0;
3091
3092 // parse node contents, starting with question mark
3093 ++s;
3094
3095 // read PI target
3096 char_t* target = s;
3097
3098 if (!PUGI__IS_CHARTYPE(*s, ct_start_symbol))
3099 PUGI__THROW_ERROR(status_bad_pi, s);
3100
3101 PUGI__SCANWHILE(PUGI__IS_CHARTYPE(*s, ct_symbol));
3102 PUGI__CHECK_ERROR(status_bad_pi, s);
3103
3104 // determine node type; stricmp / strcasecmp is not portable
3105 bool declaration = (target[0] | ' ') == 'x' && (target[1] | ' ') == 'm' &&
3106 (target[2] | ' ') == 'l' && target + 3 == s;
3107
3108 if (declaration ? PUGI__OPTSET(parse_declaration)
3109 : PUGI__OPTSET(parse_pi)) {
3110 if (declaration) {
3111 // disallow non top-level declarations
3112 if (cursor->parent) PUGI__THROW_ERROR(status_bad_pi, s);
3113
3114 PUGI__PUSHNODE(node_declaration);
3115 } else {
3116 PUGI__PUSHNODE(node_pi);
3117 }
3118
3119 cursor->name = target;
3120
3121 PUGI__ENDSEG();
3122
3123 // parse value/attributes
3124 if (ch == '?') {
3125 // empty node
3126 if (!PUGI__ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_pi, s);
3127 s += (*s == '>');
3128
3129 PUGI__POPNODE();
3130 } else if (PUGI__IS_CHARTYPE(ch, ct_space)) {
3131 PUGI__SKIPWS();
3132
3133 // scan for tag end
3134 char_t* value = s;
3135
3136 PUGI__SCANFOR(s[0] == '?' && PUGI__ENDSWITH(s[1], '>'));
3137 PUGI__CHECK_ERROR(status_bad_pi, s);
3138
3139 if (declaration) {
3140 // replace ending ? with / so that 'element' terminates properly
3141 *s = '/';
3142
3143 // we exit from this function with cursor at node_declaration, which
3144 // is a signal to parse() to go to LOC_ATTRIBUTES
3145 s = value;
3146 } else {
3147 // store value and step over >
3148 cursor->value = value;
3149
3150 PUGI__POPNODE();
3151
3152 PUGI__ENDSEG();
3153
3154 s += (*s == '>');
3155 }
3156 } else
3157 PUGI__THROW_ERROR(status_bad_pi, s);
3158 } else {
3159 // scan for tag end
3160 PUGI__SCANFOR(s[0] == '?' && PUGI__ENDSWITH(s[1], '>'));
3161 PUGI__CHECK_ERROR(status_bad_pi, s);
3162
3163 s += (s[1] == '>' ? 2 : 1);
3164 }
3165
3166 // store from registers
3167 ref_cursor = cursor;
3168
3169 return s;
3170 }
3171
3172 char_t* parse_tree(char_t* s, xml_node_struct* root, unsigned int optmsk,
3173 char_t endch) {
3174 strconv_attribute_t strconv_attribute = get_strconv_attribute(optmsk);
3175 strconv_pcdata_t strconv_pcdata = get_strconv_pcdata(optmsk);
3176
3177 char_t ch = 0;
3178 xml_node_struct* cursor = root;
3179 char_t* mark = s;
3180
3181 while (*s != 0) {
3182 if (*s == '<') {
3183 ++s;
3184
3185 LOC_TAG:
3186 if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // '<#...'
3187 {
3188 PUGI__PUSHNODE(node_element); // Append a new node to the tree.
3189
3190 cursor->name = s;
3191
3192 PUGI__SCANWHILE_UNROLL(
3193 PUGI__IS_CHARTYPE(ss, ct_symbol)); // Scan for a terminator.
3194 PUGI__ENDSEG(); // Save char in 'ch', terminate & step over.
3195
3196 if (ch == '>') {
3197 // end of tag
3198 } else if (PUGI__IS_CHARTYPE(ch, ct_space)) {
3199 LOC_ATTRIBUTES:
3200 while (true) {
3201 PUGI__SKIPWS(); // Eat any whitespace.
3202
3203 if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // <... #...
3204 {
3205 xml_attribute_struct* a = append_new_attribute(
3206 cursor, *alloc); // Make space for this attribute.
3207 if (!a) PUGI__THROW_ERROR(status_out_of_memory, s);
3208
3209 a->name = s; // Save the offset.
3210
3211 PUGI__SCANWHILE_UNROLL(PUGI__IS_CHARTYPE(
3212 ss, ct_symbol)); // Scan for a terminator.
3213 PUGI__ENDSEG(); // Save char in 'ch', terminate & step over.
3214
3215 if (PUGI__IS_CHARTYPE(ch, ct_space)) {
3216 PUGI__SKIPWS(); // Eat any whitespace.
3217
3218 ch = *s;
3219 ++s;
3220 }
3221
3222 if (ch == '=') // '<... #=...'
3223 {
3224 PUGI__SKIPWS(); // Eat any whitespace.
3225
3226 if (*s == '"' || *s == '\'') // '<... #="...'
3227 {
3228 ch = *s; // Save quote char to avoid breaking on "''" -or-
3229 // '""'.
3230 ++s; // Step over the quote.
3231 a->value = s; // Save the offset.
3232
3233 s = strconv_attribute(s, ch);
3234
3235 if (!s) PUGI__THROW_ERROR(status_bad_attribute, a->value);
3236
3237 // After this line the loop continues from the start;
3238 // Whitespaces, / and > are ok, symbols and EOF are wrong,
3239 // everything else will be detected
3240 if (PUGI__IS_CHARTYPE(*s, ct_start_symbol))
3241 PUGI__THROW_ERROR(status_bad_attribute, s);
3242 } else
3243 PUGI__THROW_ERROR(status_bad_attribute, s);
3244 } else
3245 PUGI__THROW_ERROR(status_bad_attribute, s);
3246 } else if (*s == '/') {
3247 ++s;
3248
3249 if (*s == '>') {
3250 PUGI__POPNODE();
3251 s++;
3252 break;
3253 } else if (*s == 0 && endch == '>') {
3254 PUGI__POPNODE();
3255 break;
3256 } else
3257 PUGI__THROW_ERROR(status_bad_start_element, s);
3258 } else if (*s == '>') {
3259 ++s;
3260
3261 break;
3262 } else if (*s == 0 && endch == '>') {
3263 break;
3264 } else
3265 PUGI__THROW_ERROR(status_bad_start_element, s);
3266 }
3267
3268 // !!!
3269 } else if (ch == '/') // '<#.../'
3270 {
3271 if (!PUGI__ENDSWITH(*s, '>'))
3272 PUGI__THROW_ERROR(status_bad_start_element, s);
3273
3274 PUGI__POPNODE(); // Pop.
3275
3276 s += (*s == '>');
3277 } else if (ch == 0) {
3278 // we stepped over null terminator, backtrack & handle closing tag
3279 --s;
3280
3281 if (endch != '>') PUGI__THROW_ERROR(status_bad_start_element, s);
3282 } else
3283 PUGI__THROW_ERROR(status_bad_start_element, s);
3284 } else if (*s == '/') {
3285 ++s;
3286
3287 mark = s;
3288
3289 char_t* name = cursor->name;
3290 if (!name) PUGI__THROW_ERROR(status_end_element_mismatch, mark);
3291
3292 while (PUGI__IS_CHARTYPE(*s, ct_symbol)) {
3293 if (*s++ != *name++)
3294 PUGI__THROW_ERROR(status_end_element_mismatch, mark);
3295 }
3296
3297 if (*name) {
3298 if (*s == 0 && name[0] == endch && name[1] == 0)
3299 PUGI__THROW_ERROR(status_bad_end_element, s);
3300 else
3301 PUGI__THROW_ERROR(status_end_element_mismatch, mark);
3302 }
3303
3304 PUGI__POPNODE(); // Pop.
3305
3306 PUGI__SKIPWS();
3307
3308 if (*s == 0) {
3309 if (endch != '>') PUGI__THROW_ERROR(status_bad_end_element, s);
3310 } else {
3311 if (*s != '>') PUGI__THROW_ERROR(status_bad_end_element, s);
3312 ++s;
3313 }
3314 } else if (*s == '?') // '<?...'
3315 {
3316 s = parse_question(s, cursor, optmsk, endch);
3317 if (!s) return s;
3318
3319 assert(cursor);
3320 if (PUGI__NODETYPE(cursor) == node_declaration) goto LOC_ATTRIBUTES;
3321 } else if (*s == '!') // '<!...'
3322 {
3323 s = parse_exclamation(s, cursor, optmsk, endch);
3324 if (!s) return s;
3325 } else if (*s == 0 && endch == '?')
3326 PUGI__THROW_ERROR(status_bad_pi, s);
3327 else
3328 PUGI__THROW_ERROR(status_unrecognized_tag, s);
3329 } else {
3330 mark = s; // Save this offset while searching for a terminator.
3331
3332 PUGI__SKIPWS(); // Eat whitespace if no genuine PCDATA here.
3333
3334 if (*s == '<' || !*s) {
3335 // We skipped some whitespace characters because otherwise we would
3336 // take the tag branch instead of PCDATA one
3337 assert(mark != s);
3338
3339 if (!PUGI__OPTSET(parse_ws_pcdata | parse_ws_pcdata_single) ||
3340 PUGI__OPTSET(parse_trim_pcdata)) {
3341 continue;
3342 } else if (PUGI__OPTSET(parse_ws_pcdata_single)) {
3343 if (s[0] != '<' || s[1] != '/' || cursor->first_child) continue;
3344 }
3345 }
3346
3347 if (!PUGI__OPTSET(parse_trim_pcdata)) s = mark;
3348
3349 if (cursor->parent || PUGI__OPTSET(parse_fragment)) {
3350 if (PUGI__OPTSET(parse_embed_pcdata) && cursor->parent &&
3351 !cursor->first_child && !cursor->value) {
3352 cursor->value = s; // Save the offset.
3353 } else {
3354 PUGI__PUSHNODE(node_pcdata); // Append a new node on the tree.
3355
3356 cursor->value = s; // Save the offset.
3357
3358 PUGI__POPNODE(); // Pop since this is a standalone.
3359 }
3360
3361 s = strconv_pcdata(s);
3362
3363 if (!*s) break;
3364 } else {
3365 PUGI__SCANFOR(*s == '<'); // '...<'
3366 if (!*s) break;
3367
3368 ++s;
3369 }
3370
3371 // We're after '<'
3372 goto LOC_TAG;
3373 }
3374 }
3375
3376 // check that last tag is closed
3377 if (cursor != root) PUGI__THROW_ERROR(status_end_element_mismatch, s);
3378
3379 return s;
3380 }
3381
3382#ifdef PUGIXML_WCHAR_MODE
3383 static char_t* parse_skip_bom(char_t* s) {
3384 unsigned int bom = 0xfeff;
3385 return (s[0] == static_cast<wchar_t>(bom)) ? s + 1 : s;
3386 }
3387#else
3388 static char_t* parse_skip_bom(char_t* s) {
3389 return (s[0] == '\xef' && s[1] == '\xbb' && s[2] == '\xbf') ? s + 3 : s;
3390 }
3391#endif
3392
3393 static bool has_element_node_siblings(xml_node_struct* node) {
3394 while (node) {
3395 if (PUGI__NODETYPE(node) == node_element) return true;
3396
3397 node = node->next_sibling;
3398 }
3399
3400 return false;
3401 }
3402
3403 static xml_parse_result parse(char_t* buffer, size_t length,
3404 xml_document_struct* xmldoc,
3405 xml_node_struct* root, unsigned int optmsk) {
3406 // early-out for empty documents
3407 if (length == 0)
3408 return make_parse_result(PUGI__OPTSET(parse_fragment)
3409 ? status_ok
3410 : status_no_document_element);
3411
3412 // get last child of the root before parsing
3413 xml_node_struct* last_root_child =
3414 root->first_child ? root->first_child->prev_sibling_c + 0 : 0;
3415
3416 // create parser on stack
3417 xml_parser parser(static_cast<xml_allocator*>(xmldoc));
3418
3419 // save last character and make buffer zero-terminated (speeds up parsing)
3420 char_t endch = buffer[length - 1];
3421 buffer[length - 1] = 0;
3422
3423 // skip BOM to make sure it does not end up as part of parse output
3424 char_t* buffer_data = parse_skip_bom(buffer);
3425
3426 // perform actual parsing
3427 parser.parse_tree(buffer_data, root, optmsk, endch);
3428
3429 xml_parse_result result = make_parse_result(
3430 parser.error_status,
3431 parser.error_offset ? parser.error_offset - buffer : 0);
3432 assert(result.offset >= 0 && static_cast<size_t>(result.offset) <= length);
3433
3434 if (result) {
3435 // since we removed last character, we have to handle the only possible
3436 // false positive (stray <)
3437 if (endch == '<')
3438 return make_parse_result(status_unrecognized_tag, length - 1);
3439
3440 // check if there are any element nodes parsed
3441 xml_node_struct* first_root_child_parsed =
3442 last_root_child ? last_root_child->next_sibling + 0
3443 : root->first_child + 0;
3444
3445 if (!PUGI__OPTSET(parse_fragment) &&
3446 !has_element_node_siblings(first_root_child_parsed))
3447 return make_parse_result(status_no_document_element, length - 1);
3448 } else {
3449 // roll back offset if it occurs on a null terminator in the source buffer
3450 if (result.offset > 0 &&
3451 static_cast<size_t>(result.offset) == length - 1 && endch == 0)
3452 result.offset--;
3453 }
3454
3455 return result;
3456 }
3457};
3458
3459// Output facilities
3460PUGI__FN xml_encoding get_write_native_encoding() {
3461#ifdef PUGIXML_WCHAR_MODE
3462 return get_wchar_encoding();
3463#else
3464 return encoding_utf8;
3465#endif
3466}
3467
3468PUGI__FN xml_encoding get_write_encoding(xml_encoding encoding) {
3469 // replace wchar encoding with utf implementation
3470 if (encoding == encoding_wchar) return get_wchar_encoding();
3471
3472 // replace utf16 encoding with utf16 with specific endianness
3473 if (encoding == encoding_utf16)
3474 return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
3475
3476 // replace utf32 encoding with utf32 with specific endianness
3477 if (encoding == encoding_utf32)
3478 return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
3479
3480 // only do autodetection if no explicit encoding is requested
3481 if (encoding != encoding_auto) return encoding;
3482
3483 // assume utf8 encoding
3484 return encoding_utf8;
3485}
3486
3487template <typename D, typename T>
3488PUGI__FN size_t convert_buffer_output_generic(typename T::value_type dest,
3489 const char_t* data, size_t length,
3490 D, T) {
3491 PUGI__STATIC_ASSERT(sizeof(char_t) == sizeof(typename D::type));
3492
3493 typename T::value_type end = D::process(
3494 reinterpret_cast<const typename D::type*>(data), length, dest, T());
3495
3496 return static_cast<size_t>(end - dest) * sizeof(*dest);
3497}
3498
3499template <typename D, typename T>
3500PUGI__FN size_t convert_buffer_output_generic(typename T::value_type dest,
3501 const char_t* data, size_t length,
3502 D, T, bool opt_swap) {
3503 PUGI__STATIC_ASSERT(sizeof(char_t) == sizeof(typename D::type));
3504
3505 typename T::value_type end = D::process(
3506 reinterpret_cast<const typename D::type*>(data), length, dest, T());
3507
3508 if (opt_swap) {
3509 for (typename T::value_type i = dest; i != end; ++i) *i = endian_swap(*i);
3510 }
3511
3512 return static_cast<size_t>(end - dest) * sizeof(*dest);
3513}
3514
3515#ifdef PUGIXML_WCHAR_MODE
3516PUGI__FN size_t get_valid_length(const char_t* data, size_t length) {
3517 if (length < 1) return 0;
3518
3519 // discard last character if it's the lead of a surrogate pair
3520 return (sizeof(wchar_t) == 2 &&
3521 static_cast<unsigned int>(static_cast<uint16_t>(data[length - 1]) -
3522 0xD800) < 0x400)
3523 ? length - 1
3524 : length;
3525}
3526
3527PUGI__FN size_t convert_buffer_output(char_t* r_char, uint8_t* r_u8,
3528 uint16_t* r_u16, uint32_t* r_u32,
3529 const char_t* data, size_t length,
3530 xml_encoding encoding) {
3531 // only endian-swapping is required
3532 if (need_endian_swap_utf(encoding, get_wchar_encoding())) {
3533 convert_wchar_endian_swap(r_char, data, length);
3534
3535 return length * sizeof(char_t);
3536 }
3537
3538 // convert to utf8
3539 if (encoding == encoding_utf8)
3540 return convert_buffer_output_generic(r_u8, data, length, wchar_decoder(),
3541 utf8_writer());
3542
3543 // convert to utf16
3544 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) {
3545 xml_encoding native_encoding =
3546 is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
3547
3548 return convert_buffer_output_generic(r_u16, data, length, wchar_decoder(),
3549 utf16_writer(),
3550 native_encoding != encoding);
3551 }
3552
3553 // convert to utf32
3554 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) {
3555 xml_encoding native_encoding =
3556 is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
3557
3558 return convert_buffer_output_generic(r_u32, data, length, wchar_decoder(),
3559 utf32_writer(),
3560 native_encoding != encoding);
3561 }
3562
3563 // convert to latin1
3564 if (encoding == encoding_latin1)
3565 return convert_buffer_output_generic(r_u8, data, length, wchar_decoder(),
3566 latin1_writer());
3567
3568 assert(false && "Invalid encoding");
3569 return 0;
3570}
3571#else
3572PUGI__FN size_t get_valid_length(const char_t* data, size_t length) {
3573 if (length < 5) return 0;
3574
3575 for (size_t i = 1; i <= 4; ++i) {
3576 uint8_t ch = static_cast<uint8_t>(data[length - i]);
3577
3578 // either a standalone character or a leading one
3579 if ((ch & 0xc0) != 0x80) return length - i;
3580 }
3581
3582 // there are four non-leading characters at the end, sequence tail is broken
3583 // so might as well process the whole chunk
3584 return length;
3585}
3586
3587PUGI__FN size_t convert_buffer_output(char_t* /* r_char */, uint8_t* r_u8,
3588 uint16_t* r_u16, uint32_t* r_u32,
3589 const char_t* data, size_t length,
3590 xml_encoding encoding) {
3591 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) {
3592 xml_encoding native_encoding =
3593 is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
3594
3595 return convert_buffer_output_generic(r_u16, data, length, utf8_decoder(),
3596 utf16_writer(),
3597 native_encoding != encoding);
3598 }
3599
3600 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) {
3601 xml_encoding native_encoding =
3602 is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
3603
3604 return convert_buffer_output_generic(r_u32, data, length, utf8_decoder(),
3605 utf32_writer(),
3606 native_encoding != encoding);
3607 }
3608
3609 if (encoding == encoding_latin1)
3610 return convert_buffer_output_generic(r_u8, data, length, utf8_decoder(),
3611 latin1_writer());
3612
3613 assert(false && "Invalid encoding");
3614 return 0;
3615}
3616#endif
3617
3620 xml_buffered_writer& operator=(const xml_buffered_writer&);
3621
3622public:
3623 xml_buffered_writer(xml_writer& writer_, xml_encoding user_encoding)
3624 : writer(writer_),
3625 bufsize(0),
3626 encoding(get_write_encoding(user_encoding)) {
3627 PUGI__STATIC_ASSERT(bufcapacity >= 8);
3628 }
3629
3630 size_t flush() {
3631 flush(buffer, bufsize);
3632 bufsize = 0;
3633 return 0;
3634 }
3635
3636 void flush(const char_t* data, size_t size) {
3637 if (size == 0) return;
3638
3639 // fast path, just write data
3640 if (encoding == get_write_native_encoding())
3641 writer.write(data, size * sizeof(char_t));
3642 else {
3643 // convert chunk
3644 size_t result = convert_buffer_output(scratch.data_char, scratch.data_u8,
3645 scratch.data_u16, scratch.data_u32,
3646 data, size, encoding);
3647 assert(result <= sizeof(scratch));
3648
3649 // write data
3650 writer.write(scratch.data_u8, result);
3651 }
3652 }
3653
3654 void write_direct(const char_t* data, size_t length) {
3655 // flush the remaining buffer contents
3656 flush();
3657
3658 // handle large chunks
3659 if (length > bufcapacity) {
3660 if (encoding == get_write_native_encoding()) {
3661 // fast path, can just write data chunk
3662 writer.write(data, length * sizeof(char_t));
3663 return;
3664 }
3665
3666 // need to convert in suitable chunks
3667 while (length > bufcapacity) {
3668 // get chunk size by selecting such number of characters that are
3669 // guaranteed to fit into scratch buffer and form a complete codepoint
3670 // sequence (i.e. discard start of last codepoint if necessary)
3671 size_t chunk_size = get_valid_length(data, bufcapacity);
3672 assert(chunk_size);
3673
3674 // convert chunk and write
3675 flush(data, chunk_size);
3676
3677 // iterate
3678 data += chunk_size;
3679 length -= chunk_size;
3680 }
3681
3682 // small tail is copied below
3683 bufsize = 0;
3684 }
3685
3686 memcpy(buffer + bufsize, data, length * sizeof(char_t));
3687 bufsize += length;
3688 }
3689
3690 void write_buffer(const char_t* data, size_t length) {
3691 size_t offset = bufsize;
3692
3693 if (offset + length <= bufcapacity) {
3694 memcpy(buffer + offset, data, length * sizeof(char_t));
3695 bufsize = offset + length;
3696 } else {
3697 write_direct(data, length);
3698 }
3699 }
3700
3701 void write_string(const char_t* data) {
3702 // write the part of the string that fits in the buffer
3703 size_t offset = bufsize;
3704
3705 while (*data && offset < bufcapacity) buffer[offset++] = *data++;
3706
3707 // write the rest
3708 if (offset < bufcapacity) {
3709 bufsize = offset;
3710 } else {
3711 // backtrack a bit if we have split the codepoint
3712 size_t length = offset - bufsize;
3713 size_t extra = length - get_valid_length(data - length, length);
3714
3715 bufsize = offset - extra;
3716
3717 write_direct(data - extra, strlength(data) + extra);
3718 }
3719 }
3720
3721 void write(char_t d0) {
3722 size_t offset = bufsize;
3723 if (offset > bufcapacity - 1) offset = flush();
3724
3725 buffer[offset + 0] = d0;
3726 bufsize = offset + 1;
3727 }
3728
3729 void write(char_t d0, char_t d1) {
3730 size_t offset = bufsize;
3731 if (offset > bufcapacity - 2) offset = flush();
3732
3733 buffer[offset + 0] = d0;
3734 buffer[offset + 1] = d1;
3735 bufsize = offset + 2;
3736 }
3737
3738 void write(char_t d0, char_t d1, char_t d2) {
3739 size_t offset = bufsize;
3740 if (offset > bufcapacity - 3) offset = flush();
3741
3742 buffer[offset + 0] = d0;
3743 buffer[offset + 1] = d1;
3744 buffer[offset + 2] = d2;
3745 bufsize = offset + 3;
3746 }
3747
3748 void write(char_t d0, char_t d1, char_t d2, char_t d3) {
3749 size_t offset = bufsize;
3750 if (offset > bufcapacity - 4) offset = flush();
3751
3752 buffer[offset + 0] = d0;
3753 buffer[offset + 1] = d1;
3754 buffer[offset + 2] = d2;
3755 buffer[offset + 3] = d3;
3756 bufsize = offset + 4;
3757 }
3758
3759 void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4) {
3760 size_t offset = bufsize;
3761 if (offset > bufcapacity - 5) offset = flush();
3762
3763 buffer[offset + 0] = d0;
3764 buffer[offset + 1] = d1;
3765 buffer[offset + 2] = d2;
3766 buffer[offset + 3] = d3;
3767 buffer[offset + 4] = d4;
3768 bufsize = offset + 5;
3769 }
3770
3771 void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4, char_t d5) {
3772 size_t offset = bufsize;
3773 if (offset > bufcapacity - 6) offset = flush();
3774
3775 buffer[offset + 0] = d0;
3776 buffer[offset + 1] = d1;
3777 buffer[offset + 2] = d2;
3778 buffer[offset + 3] = d3;
3779 buffer[offset + 4] = d4;
3780 buffer[offset + 5] = d5;
3781 bufsize = offset + 6;
3782 }
3783
3784 // utf8 maximum expansion: x4 (-> utf32)
3785 // utf16 maximum expansion: x2 (-> utf32)
3786 // utf32 maximum expansion: x1
3787 enum {
3788 bufcapacitybytes =
3789#ifdef PUGIXML_MEMORY_OUTPUT_STACK
3790 PUGIXML_MEMORY_OUTPUT_STACK
3791#else
3792 10240
3793#endif
3794 ,
3795 bufcapacity = bufcapacitybytes / (sizeof(char_t) + 4)
3796 };
3797
3798 char_t buffer[bufcapacity];
3799
3800 union {
3801 uint8_t data_u8[4 * bufcapacity];
3802 uint16_t data_u16[2 * bufcapacity];
3803 uint32_t data_u32[bufcapacity];
3804 char_t data_char[bufcapacity];
3805 } scratch;
3806
3807 xml_writer& writer;
3808 size_t bufsize;
3809 xml_encoding encoding;
3810};
3811
3812PUGI__FN void text_output_escaped(xml_buffered_writer& writer, const char_t* s,
3813 chartypex_t type) {
3814 while (*s) {
3815 const char_t* prev = s;
3816
3817 // While *s is a usual symbol
3818 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPEX(ss, type));
3819
3820 writer.write_buffer(prev, static_cast<size_t>(s - prev));
3821
3822 switch (*s) {
3823 case 0:
3824 break;
3825 case '&':
3826 writer.write('&', 'a', 'm', 'p', ';');
3827 ++s;
3828 break;
3829 case '<':
3830 writer.write('&', 'l', 't', ';');
3831 ++s;
3832 break;
3833 case '>':
3834 writer.write('&', 'g', 't', ';');
3835 ++s;
3836 break;
3837 case '"':
3838 writer.write('&', 'q', 'u', 'o', 't', ';');
3839 ++s;
3840 break;
3841 default: // s is not a usual symbol
3842 {
3843 unsigned int ch = static_cast<unsigned int>(*s++);
3844 assert(ch < 32);
3845
3846 writer.write('&', '#', static_cast<char_t>((ch / 10) + '0'),
3847 static_cast<char_t>((ch % 10) + '0'), ';');
3848 }
3849 }
3850 }
3851}
3852
3853PUGI__FN void text_output(xml_buffered_writer& writer, const char_t* s,
3854 chartypex_t type, unsigned int flags) {
3855 if (flags & format_no_escapes)
3856 writer.write_string(s);
3857 else
3858 text_output_escaped(writer, s, type);
3859}
3860
3861PUGI__FN void text_output_cdata(xml_buffered_writer& writer, const char_t* s) {
3862 do {
3863 writer.write('<', '!', '[', 'C', 'D');
3864 writer.write('A', 'T', 'A', '[');
3865
3866 const char_t* prev = s;
3867
3868 // look for ]]> sequence - we can't output it as is since it terminates
3869 // CDATA
3870 while (*s && !(s[0] == ']' && s[1] == ']' && s[2] == '>')) ++s;
3871
3872 // skip ]] if we stopped at ]]>, > will go to the next CDATA section
3873 if (*s) s += 2;
3874
3875 writer.write_buffer(prev, static_cast<size_t>(s - prev));
3876
3877 writer.write(']', ']', '>');
3878 } while (*s);
3879}
3880
3881PUGI__FN void text_output_indent(xml_buffered_writer& writer,
3882 const char_t* indent, size_t indent_length,
3883 unsigned int depth) {
3884 switch (indent_length) {
3885 case 1: {
3886 for (unsigned int i = 0; i < depth; ++i) writer.write(indent[0]);
3887 break;
3888 }
3889
3890 case 2: {
3891 for (unsigned int i = 0; i < depth; ++i)
3892 writer.write(indent[0], indent[1]);
3893 break;
3894 }
3895
3896 case 3: {
3897 for (unsigned int i = 0; i < depth; ++i)
3898 writer.write(indent[0], indent[1], indent[2]);
3899 break;
3900 }
3901
3902 case 4: {
3903 for (unsigned int i = 0; i < depth; ++i)
3904 writer.write(indent[0], indent[1], indent[2], indent[3]);
3905 break;
3906 }
3907
3908 default: {
3909 for (unsigned int i = 0; i < depth; ++i)
3910 writer.write_buffer(indent, indent_length);
3911 }
3912 }
3913}
3914
3915PUGI__FN void node_output_comment(xml_buffered_writer& writer,
3916 const char_t* s) {
3917 writer.write('<', '!', '-', '-');
3918
3919 while (*s) {
3920 const char_t* prev = s;
3921
3922 // look for -\0 or -- sequence - we can't output it since -- is illegal in
3923 // comment body
3924 while (*s && !(s[0] == '-' && (s[1] == '-' || s[1] == 0))) ++s;
3925
3926 writer.write_buffer(prev, static_cast<size_t>(s - prev));
3927
3928 if (*s) {
3929 assert(*s == '-');
3930
3931 writer.write('-', ' ');
3932 ++s;
3933 }
3934 }
3935
3936 writer.write('-', '-', '>');
3937}
3938
3939PUGI__FN void node_output_pi_value(xml_buffered_writer& writer,
3940 const char_t* s) {
3941 while (*s) {
3942 const char_t* prev = s;
3943
3944 // look for ?> sequence - we can't output it since ?> terminates PI
3945 while (*s && !(s[0] == '?' && s[1] == '>')) ++s;
3946
3947 writer.write_buffer(prev, static_cast<size_t>(s - prev));
3948
3949 if (*s) {
3950 assert(s[0] == '?' && s[1] == '>');
3951
3952 writer.write('?', ' ', '>');
3953 s += 2;
3954 }
3955 }
3956}
3957
3958PUGI__FN void node_output_attributes(xml_buffered_writer& writer,
3959 xml_node_struct* node,
3960 const char_t* indent, size_t indent_length,
3961 unsigned int flags, unsigned int depth) {
3962 const char_t* default_name = PUGIXML_TEXT(":anonymous");
3963
3964 for (xml_attribute_struct* a = node->first_attribute; a;
3965 a = a->next_attribute) {
3966 if ((flags & (format_indent_attributes | format_raw)) ==
3967 format_indent_attributes) {
3968 writer.write('\n');
3969
3970 text_output_indent(writer, indent, indent_length, depth + 1);
3971 } else {
3972 writer.write(' ');
3973 }
3974
3975 writer.write_string(a->name ? a->name + 0 : default_name);
3976 writer.write('=', '"');
3977
3978 if (a->value) text_output(writer, a->value, ctx_special_attr, flags);
3979
3980 writer.write('"');
3981 }
3982}
3983
3984PUGI__FN bool node_output_start(xml_buffered_writer& writer,
3985 xml_node_struct* node, const char_t* indent,
3986 size_t indent_length, unsigned int flags,
3987 unsigned int depth) {
3988 const char_t* default_name = PUGIXML_TEXT(":anonymous");
3989 const char_t* name = node->name ? node->name + 0 : default_name;
3990
3991 writer.write('<');
3992 writer.write_string(name);
3993
3994 if (node->first_attribute)
3995 node_output_attributes(writer, node, indent, indent_length, flags, depth);
3996
3997 // element nodes can have value if parse_embed_pcdata was used
3998 if (!node->value) {
3999 if (!node->first_child) {
4000 if (flags & format_no_empty_element_tags) {
4001 writer.write('>', '<', '/');
4002 writer.write_string(name);
4003 writer.write('>');
4004
4005 return false;
4006 } else {
4007 if ((flags & format_raw) == 0) writer.write(' ');
4008
4009 writer.write('/', '>');
4010
4011 return false;
4012 }
4013 } else {
4014 writer.write('>');
4015
4016 return true;
4017 }
4018 } else {
4019 writer.write('>');
4020
4021 text_output(writer, node->value, ctx_special_pcdata, flags);
4022
4023 if (!node->first_child) {
4024 writer.write('<', '/');
4025 writer.write_string(name);
4026 writer.write('>');
4027
4028 return false;
4029 } else {
4030 return true;
4031 }
4032 }
4033}
4034
4035PUGI__FN void node_output_end(xml_buffered_writer& writer,
4036 xml_node_struct* node) {
4037 const char_t* default_name = PUGIXML_TEXT(":anonymous");
4038 const char_t* name = node->name ? node->name + 0 : default_name;
4039
4040 writer.write('<', '/');
4041 writer.write_string(name);
4042 writer.write('>');
4043}
4044
4045PUGI__FN void node_output_simple(xml_buffered_writer& writer,
4046 xml_node_struct* node, unsigned int flags) {
4047 const char_t* default_name = PUGIXML_TEXT(":anonymous");
4048
4049 switch (PUGI__NODETYPE(node)) {
4050 case node_pcdata:
4051 text_output(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""),
4052 ctx_special_pcdata, flags);
4053 break;
4054
4055 case node_cdata:
4056 text_output_cdata(writer,
4057 node->value ? node->value + 0 : PUGIXML_TEXT(""));
4058 break;
4059
4060 case node_comment:
4061 node_output_comment(writer,
4062 node->value ? node->value + 0 : PUGIXML_TEXT(""));
4063 break;
4064
4065 case node_pi:
4066 writer.write('<', '?');
4067 writer.write_string(node->name ? node->name + 0 : default_name);
4068
4069 if (node->value) {
4070 writer.write(' ');
4071 node_output_pi_value(writer, node->value);
4072 }
4073
4074 writer.write('?', '>');
4075 break;
4076
4077 case node_declaration:
4078 writer.write('<', '?');
4079 writer.write_string(node->name ? node->name + 0 : default_name);
4080 node_output_attributes(writer, node, PUGIXML_TEXT(""), 0,
4081 flags | format_raw, 0);
4082 writer.write('?', '>');
4083 break;
4084
4085 case node_doctype:
4086 writer.write('<', '!', 'D', 'O', 'C');
4087 writer.write('T', 'Y', 'P', 'E');
4088
4089 if (node->value) {
4090 writer.write(' ');
4091 writer.write_string(node->value);
4092 }
4093
4094 writer.write('>');
4095 break;
4096
4097 default:
4098 assert(false && "Invalid node type");
4099 }
4100}
4101
4102enum indent_flags_t { indent_newline = 1, indent_indent = 2 };
4103
4104PUGI__FN void node_output(xml_buffered_writer& writer, xml_node_struct* root,
4105 const char_t* indent, unsigned int flags,
4106 unsigned int depth) {
4107 size_t indent_length =
4108 ((flags & (format_indent | format_indent_attributes)) &&
4109 (flags & format_raw) == 0)
4110 ? strlength(indent)
4111 : 0;
4112 unsigned int indent_flags = indent_indent;
4113
4114 xml_node_struct* node = root;
4115
4116 do {
4117 assert(node);
4118
4119 // begin writing current node
4120 if (PUGI__NODETYPE(node) == node_pcdata ||
4121 PUGI__NODETYPE(node) == node_cdata) {
4122 node_output_simple(writer, node, flags);
4123
4124 indent_flags = 0;
4125 } else {
4126 if ((indent_flags & indent_newline) && (flags & format_raw) == 0)
4127 writer.write('\n');
4128
4129 if ((indent_flags & indent_indent) && indent_length)
4130 text_output_indent(writer, indent, indent_length, depth);
4131
4132 if (PUGI__NODETYPE(node) == node_element) {
4133 indent_flags = indent_newline | indent_indent;
4134
4135 if (node_output_start(writer, node, indent, indent_length, flags,
4136 depth)) {
4137 // element nodes can have value if parse_embed_pcdata was used
4138 if (node->value) indent_flags = 0;
4139
4140 node = node->first_child;
4141 depth++;
4142 continue;
4143 }
4144 } else if (PUGI__NODETYPE(node) == node_document) {
4145 indent_flags = indent_indent;
4146
4147 if (node->first_child) {
4148 node = node->first_child;
4149 continue;
4150 }
4151 } else {
4152 node_output_simple(writer, node, flags);
4153
4154 indent_flags = indent_newline | indent_indent;
4155 }
4156 }
4157
4158 // continue to the next node
4159 while (node != root) {
4160 if (node->next_sibling) {
4161 node = node->next_sibling;
4162 break;
4163 }
4164
4165 node = node->parent;
4166
4167 // write closing node
4168 if (PUGI__NODETYPE(node) == node_element) {
4169 depth--;
4170
4171 if ((indent_flags & indent_newline) && (flags & format_raw) == 0)
4172 writer.write('\n');
4173
4174 if ((indent_flags & indent_indent) && indent_length)
4175 text_output_indent(writer, indent, indent_length, depth);
4176
4177 node_output_end(writer, node);
4178
4179 indent_flags = indent_newline | indent_indent;
4180 }
4181 }
4182 } while (node != root);
4183
4184 if ((indent_flags & indent_newline) && (flags & format_raw) == 0)
4185 writer.write('\n');
4186}
4187
4188PUGI__FN bool has_declaration(xml_node_struct* node) {
4189 for (xml_node_struct* child = node->first_child; child;
4190 child = child->next_sibling) {
4191 xml_node_type type = PUGI__NODETYPE(child);
4192
4193 if (type == node_declaration) return true;
4194 if (type == node_element) return false;
4195 }
4196
4197 return false;
4198}
4199
4200PUGI__FN bool is_attribute_of(xml_attribute_struct* attr,
4201 xml_node_struct* node) {
4202 for (xml_attribute_struct* a = node->first_attribute; a;
4203 a = a->next_attribute)
4204 if (a == attr) return true;
4205
4206 return false;
4207}
4208
4209PUGI__FN bool allow_insert_attribute(xml_node_type parent) {
4210 return parent == node_element || parent == node_declaration;
4211}
4212
4213PUGI__FN bool allow_insert_child(xml_node_type parent, xml_node_type child) {
4214 if (parent != node_document && parent != node_element) return false;
4215 if (child == node_document || child == node_null) return false;
4216 if (parent != node_document &&
4217 (child == node_declaration || child == node_doctype))
4218 return false;
4219
4220 return true;
4221}
4222
4223PUGI__FN bool allow_move(xml_node parent, xml_node child) {
4224 // check that child can be a child of parent
4225 if (!allow_insert_child(parent.type(), child.type())) return false;
4226
4227 // check that node is not moved between documents
4228 if (parent.root() != child.root()) return false;
4229
4230 // check that new parent is not in the child subtree
4231 xml_node cur = parent;
4232
4233 while (cur) {
4234 if (cur == child) return false;
4235
4236 cur = cur.parent();
4237 }
4238
4239 return true;
4240}
4241
4242template <typename String, typename Header>
4243PUGI__FN void node_copy_string(String& dest, Header& header,
4244 uintptr_t header_mask, char_t* source,
4245 Header& source_header, xml_allocator* alloc) {
4246 assert(!dest && (header & header_mask) == 0);
4247
4248 if (source) {
4249 if (alloc && (source_header & header_mask) == 0) {
4250 dest = source;
4251
4252 // since strcpy_insitu can reuse document buffer memory we need to mark
4253 // both source and dest as shared
4254 header |= xml_memory_page_contents_shared_mask;
4255 source_header |= xml_memory_page_contents_shared_mask;
4256 } else
4257 strcpy_insitu(dest, header, header_mask, source, strlength(source));
4258 }
4259}
4260
4261PUGI__FN void node_copy_contents(xml_node_struct* dn, xml_node_struct* sn,
4262 xml_allocator* shared_alloc) {
4263 node_copy_string(dn->name, dn->header, xml_memory_page_name_allocated_mask,
4264 sn->name, sn->header, shared_alloc);
4265 node_copy_string(dn->value, dn->header, xml_memory_page_value_allocated_mask,
4266 sn->value, sn->header, shared_alloc);
4267
4268 for (xml_attribute_struct* sa = sn->first_attribute; sa;
4269 sa = sa->next_attribute) {
4270 xml_attribute_struct* da = append_new_attribute(dn, get_allocator(dn));
4271
4272 if (da) {
4273 node_copy_string(da->name, da->header,
4274 xml_memory_page_name_allocated_mask, sa->name,
4275 sa->header, shared_alloc);
4276 node_copy_string(da->value, da->header,
4277 xml_memory_page_value_allocated_mask, sa->value,
4278 sa->header, shared_alloc);
4279 }
4280 }
4281}
4282
4283PUGI__FN void node_copy_tree(xml_node_struct* dn, xml_node_struct* sn) {
4284 xml_allocator& alloc = get_allocator(dn);
4285 xml_allocator* shared_alloc = (&alloc == &get_allocator(sn)) ? &alloc : 0;
4286
4287 node_copy_contents(dn, sn, shared_alloc);
4288
4289 xml_node_struct* dit = dn;
4290 xml_node_struct* sit = sn->first_child;
4291
4292 while (sit && sit != sn) {
4293 if (sit != dn) {
4294 xml_node_struct* copy = append_new_node(dit, alloc, PUGI__NODETYPE(sit));
4295
4296 if (copy) {
4297 node_copy_contents(copy, sit, shared_alloc);
4298
4299 if (sit->first_child) {
4300 dit = copy;
4301 sit = sit->first_child;
4302 continue;
4303 }
4304 }
4305 }
4306
4307 // continue to the next node
4308 do {
4309 if (sit->next_sibling) {
4310 sit = sit->next_sibling;
4311 break;
4312 }
4313
4314 sit = sit->parent;
4315 dit = dit->parent;
4316 } while (sit != sn);
4317 }
4318}
4319
4320PUGI__FN void node_copy_attribute(xml_attribute_struct* da,
4321 xml_attribute_struct* sa) {
4322 xml_allocator& alloc = get_allocator(da);
4323 xml_allocator* shared_alloc = (&alloc == &get_allocator(sa)) ? &alloc : 0;
4324
4325 node_copy_string(da->name, da->header, xml_memory_page_name_allocated_mask,
4326 sa->name, sa->header, shared_alloc);
4327 node_copy_string(da->value, da->header, xml_memory_page_value_allocated_mask,
4328 sa->value, sa->header, shared_alloc);
4329}
4330
4331inline bool is_text_node(xml_node_struct* node) {
4332 xml_node_type type = PUGI__NODETYPE(node);
4333
4334 return type == node_pcdata || type == node_cdata;
4335}
4336
4337// get value with conversion functions
4338template <typename U>
4339U string_to_integer(const char_t* value, U minneg, U maxpos) {
4340 U result = 0;
4341 const char_t* s = value;
4342
4343 while (PUGI__IS_CHARTYPE(*s, ct_space)) s++;
4344
4345 bool negative = (*s == '-');
4346
4347 s += (*s == '+' || *s == '-');
4348
4349 bool overflow = false;
4350
4351 if (s[0] == '0' && (s[1] | ' ') == 'x') {
4352 s += 2;
4353
4354 // since overflow detection relies on length of the sequence skip leading
4355 // zeros
4356 while (*s == '0') s++;
4357
4358 const char_t* start = s;
4359
4360 for (;;) {
4361 if (static_cast<unsigned>(*s - '0') < 10)
4362 result = result * 16 + (*s - '0');
4363 else if (static_cast<unsigned>((*s | ' ') - 'a') < 6)
4364 result = result * 16 + ((*s | ' ') - 'a' + 10);
4365 else
4366 break;
4367
4368 s++;
4369 }
4370
4371 size_t digits = static_cast<size_t>(s - start);
4372
4373 overflow = digits > sizeof(U) * 2;
4374 } else {
4375 // since overflow detection relies on length of the sequence skip leading
4376 // zeros
4377 while (*s == '0') s++;
4378
4379 const char_t* start = s;
4380
4381 for (;;) {
4382 if (static_cast<unsigned>(*s - '0') < 10)
4383 result = result * 10 + (*s - '0');
4384 else
4385 break;
4386
4387 s++;
4388 }
4389
4390 size_t digits = static_cast<size_t>(s - start);
4391
4392 PUGI__STATIC_ASSERT(sizeof(U) == 8 || sizeof(U) == 4 || sizeof(U) == 2);
4393
4394 const size_t max_digits10 = sizeof(U) == 8 ? 20 : sizeof(U) == 4 ? 10 : 5;
4395 const char_t max_lead = sizeof(U) == 8 ? '1' : sizeof(U) == 4 ? '4' : '6';
4396 const size_t high_bit = sizeof(U) * 8 - 1;
4397
4398 overflow =
4399 digits >= max_digits10 &&
4400 !(digits == max_digits10 &&
4401 (*start < max_lead || (*start == max_lead && result >> high_bit)));
4402 }
4403
4404 if (negative)
4405 return (overflow || result > minneg) ? 0 - minneg : 0 - result;
4406 else
4407 return (overflow || result > maxpos) ? maxpos : result;
4408}
4409
4410PUGI__FN int get_value_int(const char_t* value) {
4411 return string_to_integer<unsigned int>(
4412 value, 0 - static_cast<unsigned int>(INT_MIN), INT_MAX);
4413}
4414
4415PUGI__FN unsigned int get_value_uint(const char_t* value) {
4416 return string_to_integer<unsigned int>(value, 0, UINT_MAX);
4417}
4418
4419PUGI__FN double get_value_double(const char_t* value) {
4420#ifdef PUGIXML_WCHAR_MODE
4421 return wcstod(value, 0);
4422#else
4423 return strtod(value, 0);
4424#endif
4425}
4426
4427PUGI__FN float get_value_float(const char_t* value) {
4428#ifdef PUGIXML_WCHAR_MODE
4429 return static_cast<float>(wcstod(value, 0));
4430#else
4431 return static_cast<float>(strtod(value, 0));
4432#endif
4433}
4434
4435PUGI__FN bool get_value_bool(const char_t* value) {
4436 // only look at first char
4437 char_t first = *value;
4438
4439 // 1*, t* (true), T* (True), y* (yes), Y* (YES)
4440 return (first == '1' || first == 't' || first == 'T' || first == 'y' ||
4441 first == 'Y');
4442}
4443
4444#ifdef PUGIXML_HAS_LONG_LONG
4445PUGI__FN long long get_value_llong(const char_t* value) {
4446 return string_to_integer<unsigned long long>(
4447 value, 0 - static_cast<unsigned long long>(LLONG_MIN), LLONG_MAX);
4448}
4449
4450PUGI__FN unsigned long long get_value_ullong(const char_t* value) {
4451 return string_to_integer<unsigned long long>(value, 0, ULLONG_MAX);
4452}
4453#endif
4454
4455template <typename U>
4456PUGI__FN char_t* integer_to_string(char_t* begin, char_t* end, U value,
4457 bool negative) {
4458 char_t* result = end - 1;
4459 U rest = negative ? 0 - value : value;
4460
4461 do {
4462 *result-- = static_cast<char_t>('0' + (rest % 10));
4463 rest /= 10;
4464 } while (rest);
4465
4466 assert(result >= begin);
4467 (void)begin;
4468
4469 *result = '-';
4470
4471 return result + !negative;
4472}
4473
4474// set value with conversion functions
4475template <typename String, typename Header>
4476PUGI__FN bool set_value_ascii(String& dest, Header& header,
4477 uintptr_t header_mask, char* buf) {
4478#ifdef PUGIXML_WCHAR_MODE
4479 char_t wbuf[128];
4480 assert(strlen(buf) < sizeof(wbuf) / sizeof(wbuf[0]));
4481
4482 size_t offset = 0;
4483 for (; buf[offset]; ++offset) wbuf[offset] = buf[offset];
4484
4485 return strcpy_insitu(dest, header, header_mask, wbuf, offset);
4486#else
4487 return strcpy_insitu(dest, header, header_mask, buf, strlen(buf));
4488#endif
4489}
4490
4491template <typename U, typename String, typename Header>
4492PUGI__FN bool set_value_integer(String& dest, Header& header,
4493 uintptr_t header_mask, U value, bool negative) {
4494 char_t buf[64];
4495 char_t* end = buf + sizeof(buf) / sizeof(buf[0]);
4496 char_t* begin = integer_to_string(buf, end, value, negative);
4497
4498 return strcpy_insitu(dest, header, header_mask, begin, end - begin);
4499}
4500
4501template <typename String, typename Header>
4502PUGI__FN bool set_value_convert(String& dest, Header& header,
4503 uintptr_t header_mask, float value) {
4504 char buf[128];
4505 sprintf(buf, "%.9g", value);
4506
4507 return set_value_ascii(dest, header, header_mask, buf);
4508}
4509
4510template <typename String, typename Header>
4511PUGI__FN bool set_value_convert(String& dest, Header& header,
4512 uintptr_t header_mask, double value) {
4513 char buf[128];
4514 sprintf(buf, "%.17g", value);
4515
4516 return set_value_ascii(dest, header, header_mask, buf);
4517}
4518
4519template <typename String, typename Header>
4520PUGI__FN bool set_value_bool(String& dest, Header& header,
4521 uintptr_t header_mask, bool value) {
4522 return strcpy_insitu(dest, header, header_mask,
4523 value ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"),
4524 value ? 4 : 5);
4525}
4526
4527PUGI__FN xml_parse_result load_buffer_impl(
4528 xml_document_struct* doc, xml_node_struct* root, void* contents,
4529 size_t size, unsigned int options, xml_encoding encoding, bool is_mutable,
4530 bool own, char_t** out_buffer) {
4531 // check input buffer
4532 if (!contents && size) return make_parse_result(status_io_error);
4533
4534 // get actual encoding
4535 xml_encoding buffer_encoding =
4536 impl::get_buffer_encoding(encoding, contents, size);
4537
4538 // get private buffer
4539 char_t* buffer = 0;
4540 size_t length = 0;
4541
4542 if (!impl::convert_buffer(buffer, length, buffer_encoding, contents, size,
4543 is_mutable))
4544 return impl::make_parse_result(status_out_of_memory);
4545
4546 // delete original buffer if we performed a conversion
4547 if (own && buffer != contents && contents)
4548 impl::xml_memory::deallocate(contents);
4549
4550 // grab onto buffer if it's our buffer, user is responsible for deallocating
4551 // contents himself
4552 if (own || buffer != contents) *out_buffer = buffer;
4553
4554 // store buffer for offset_debug
4555 doc->buffer = buffer;
4556
4557 // parse
4558 xml_parse_result res =
4559 impl::xml_parser::parse(buffer, length, doc, root, options);
4560
4561 // remember encoding
4562 res.encoding = buffer_encoding;
4563
4564 return res;
4565}
4566
4567// we need to get length of entire file to load it in memory; the only
4568// (relatively) sane way to do it is via seek/tell trick
4569PUGI__FN xml_parse_status get_file_size(FILE* file, size_t& out_result) {
4570#if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 && \
4571 !defined(_WIN32_WCE)
4572 // there are 64-bit versions of fseek/ftell, let's use them
4573 typedef __int64 length_type;
4574
4575 _fseeki64(file, 0, SEEK_END);
4576 length_type length = _ftelli64(file);
4577 _fseeki64(file, 0, SEEK_SET);
4578#elif defined(__MINGW32__) && !defined(__NO_MINGW_LFS) && \
4579 (!defined(__STRICT_ANSI__) || defined(__MINGW64_VERSION_MAJOR))
4580 // there are 64-bit versions of fseek/ftell, let's use them
4581 typedef off64_t length_type;
4582
4583 fseeko64(file, 0, SEEK_END);
4584 length_type length = ftello64(file);
4585 fseeko64(file, 0, SEEK_SET);
4586#else
4587 // if this is a 32-bit OS, long is enough; if this is a unix system, long is
4588 // 64-bit, which is enough; otherwise we can't do anything anyway.
4589 typedef long length_type;
4590
4591 fseek(file, 0, SEEK_END);
4592 length_type length = ftell(file);
4593 fseek(file, 0, SEEK_SET);
4594#endif
4595
4596 // check for I/O errors
4597 if (length < 0) return status_io_error;
4598
4599 // check for overflow
4600 size_t result = static_cast<size_t>(length);
4601
4602 if (static_cast<length_type>(result) != length) return status_out_of_memory;
4603
4604 // finalize
4605 out_result = result;
4606
4607 return status_ok;
4608}
4609
4610// This function assumes that buffer has extra sizeof(char_t) writable bytes
4611// after size
4612PUGI__FN size_t zero_terminate_buffer(void* buffer, size_t size,
4613 xml_encoding encoding) {
4614 // We only need to zero-terminate if encoding conversion does not do it for us
4615#ifdef PUGIXML_WCHAR_MODE
4616 xml_encoding wchar_encoding = get_wchar_encoding();
4617
4618 if (encoding == wchar_encoding ||
4619 need_endian_swap_utf(encoding, wchar_encoding)) {
4620 size_t length = size / sizeof(char_t);
4621
4622 static_cast<char_t*>(buffer)[length] = 0;
4623 return (length + 1) * sizeof(char_t);
4624 }
4625#else
4626 if (encoding == encoding_utf8) {
4627 static_cast<char*>(buffer)[size] = 0;
4628 return size + 1;
4629 }
4630#endif
4631
4632 return size;
4633}
4634
4635PUGI__FN xml_parse_result load_file_impl(xml_document_struct* doc, FILE* file,
4636 unsigned int options,
4637 xml_encoding encoding,
4638 char_t** out_buffer) {
4639 if (!file) return make_parse_result(status_file_not_found);
4640
4641 // get file size (can result in I/O errors)
4642 size_t size = 0;
4643 xml_parse_status size_status = get_file_size(file, size);
4644 if (size_status != status_ok) return make_parse_result(size_status);
4645
4646 size_t max_suffix_size = sizeof(char_t);
4647
4648 // allocate buffer for the whole file
4649 char* contents =
4650 static_cast<char*>(xml_memory::allocate(size + max_suffix_size));
4651 if (!contents) return make_parse_result(status_out_of_memory);
4652
4653 // read file in memory
4654 size_t read_size = fread(contents, 1, size, file);
4655
4656 if (read_size != size) {
4657 xml_memory::deallocate(contents);
4658 return make_parse_result(status_io_error);
4659 }
4660
4661 xml_encoding real_encoding = get_buffer_encoding(encoding, contents, size);
4662
4663 return load_buffer_impl(doc, doc, contents,
4664 zero_terminate_buffer(contents, size, real_encoding),
4665 options, real_encoding, true, true, out_buffer);
4666}
4667
4668PUGI__FN void close_file(FILE* file) { fclose(file); }
4669
4670#ifndef PUGIXML_NO_STL
4671template <typename T>
4673 static xml_stream_chunk* create() {
4674 void* memory = xml_memory::allocate(sizeof(xml_stream_chunk));
4675 if (!memory) return 0;
4676
4677 return new (memory) xml_stream_chunk();
4678 }
4679
4680 static void destroy(xml_stream_chunk* chunk) {
4681 // free chunk chain
4682 while (chunk) {
4683 xml_stream_chunk* next_ = chunk->next;
4684
4685 xml_memory::deallocate(chunk);
4686
4687 chunk = next_;
4688 }
4689 }
4690
4691 xml_stream_chunk() : next(0), size(0) {}
4692
4693 xml_stream_chunk* next;
4694 size_t size;
4695
4696 T data[xml_memory_page_size / sizeof(T)];
4697};
4698
4699template <typename T>
4700PUGI__FN xml_parse_status load_stream_data_noseek(std::basic_istream<T>& stream,
4701 void** out_buffer,
4702 size_t* out_size) {
4704
4705 // read file to a chunk list
4706 size_t total = 0;
4707 xml_stream_chunk<T>* last = 0;
4708
4709 while (!stream.eof()) {
4710 // allocate new chunk
4712 if (!chunk) return status_out_of_memory;
4713
4714 // append chunk to list
4715 if (last)
4716 last = last->next = chunk;
4717 else
4718 chunks.data = last = chunk;
4719
4720 // read data to chunk
4721 stream.read(chunk->data,
4722 static_cast<std::streamsize>(sizeof(chunk->data) / sizeof(T)));
4723 chunk->size = static_cast<size_t>(stream.gcount()) * sizeof(T);
4724
4725 // read may set failbit | eofbit in case gcount() is less than read length,
4726 // so check for other I/O errors
4727 if (stream.bad() || (!stream.eof() && stream.fail()))
4728 return status_io_error;
4729
4730 // guard against huge files (chunk size is small enough to make this
4731 // overflow check work)
4732 if (total + chunk->size < total) return status_out_of_memory;
4733 total += chunk->size;
4734 }
4735
4736 size_t max_suffix_size = sizeof(char_t);
4737
4738 // copy chunk list to a contiguous buffer
4739 char* buffer =
4740 static_cast<char*>(xml_memory::allocate(total + max_suffix_size));
4741 if (!buffer) return status_out_of_memory;
4742
4743 char* write = buffer;
4744
4745 for (xml_stream_chunk<T>* chunk = chunks.data; chunk; chunk = chunk->next) {
4746 assert(write + chunk->size <= buffer + total);
4747 memcpy(write, chunk->data, chunk->size);
4748 write += chunk->size;
4749 }
4750
4751 assert(write == buffer + total);
4752
4753 // return buffer
4754 *out_buffer = buffer;
4755 *out_size = total;
4756
4757 return status_ok;
4758}
4759
4760template <typename T>
4761PUGI__FN xml_parse_status load_stream_data_seek(std::basic_istream<T>& stream,
4762 void** out_buffer,
4763 size_t* out_size) {
4764 // get length of remaining data in stream
4765 typename std::basic_istream<T>::pos_type pos = stream.tellg();
4766 stream.seekg(0, std::ios::end);
4767 std::streamoff length = stream.tellg() - pos;
4768 stream.seekg(pos);
4769
4770 if (stream.fail() || pos < 0) return status_io_error;
4771
4772 // guard against huge files
4773 size_t read_length = static_cast<size_t>(length);
4774
4775 if (static_cast<std::streamsize>(read_length) != length || length < 0)
4776 return status_out_of_memory;
4777
4778 size_t max_suffix_size = sizeof(char_t);
4779
4780 // read stream data into memory (guard against stream exceptions with buffer
4781 // holder)
4782 auto_deleter<void> buffer(
4783 xml_memory::allocate(read_length * sizeof(T) + max_suffix_size),
4784 xml_memory::deallocate);
4785 if (!buffer.data) return status_out_of_memory;
4786
4787 stream.read(static_cast<T*>(buffer.data),
4788 static_cast<std::streamsize>(read_length));
4789
4790 // read may set failbit | eofbit in case gcount() is less than read_length
4791 // (i.e. line ending conversion), so check for other I/O errors
4792 if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error;
4793
4794 // return buffer
4795 size_t actual_length = static_cast<size_t>(stream.gcount());
4796 assert(actual_length <= read_length);
4797
4798 *out_buffer = buffer.release();
4799 *out_size = actual_length * sizeof(T);
4800
4801 return status_ok;
4802}
4803
4804template <typename T>
4805PUGI__FN xml_parse_result load_stream_impl(xml_document_struct* doc,
4806 std::basic_istream<T>& stream,
4807 unsigned int options,
4808 xml_encoding encoding,
4809 char_t** out_buffer) {
4810 void* buffer = 0;
4811 size_t size = 0;
4812 xml_parse_status status = status_ok;
4813
4814 // if stream has an error bit set, bail out (otherwise tellg() can fail and
4815 // we'll clear error bits)
4816 if (stream.fail()) return make_parse_result(status_io_error);
4817
4818 // load stream to memory (using seek-based implementation if possible, since
4819 // it's faster and takes less memory)
4820 if (stream.tellg() < 0) {
4821 stream.clear(); // clear error flags that could be set by a failing tellg
4822 status = load_stream_data_noseek(stream, &buffer, &size);
4823 } else
4824 status = load_stream_data_seek(stream, &buffer, &size);
4825
4826 if (status != status_ok) return make_parse_result(status);
4827
4828 xml_encoding real_encoding = get_buffer_encoding(encoding, buffer, size);
4829
4830 return load_buffer_impl(doc, doc, buffer,
4831 zero_terminate_buffer(buffer, size, real_encoding),
4832 options, real_encoding, true, true, out_buffer);
4833}
4834#endif
4835
4836#if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__) || \
4837 (defined(__MINGW32__) && \
4838 (!defined(__STRICT_ANSI__) || defined(__MINGW64_VERSION_MAJOR)))
4839PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode) {
4840 return _wfopen(path, mode);
4841}
4842#else
4843PUGI__FN char* convert_path_heap(const wchar_t* str) {
4844 assert(str);
4845
4846 // first pass: get length in utf8 characters
4847 size_t length = strlength_wide(str);
4848 size_t size = as_utf8_begin(str, length);
4849
4850 // allocate resulting string
4851 char* result = static_cast<char*>(xml_memory::allocate(size + 1));
4852 if (!result) return 0;
4853
4854 // second pass: convert to utf8
4855 as_utf8_end(result, size, str, length);
4856
4857 // zero-terminate
4858 result[size] = 0;
4859
4860 return result;
4861}
4862
4863PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode) {
4864 // there is no standard function to open wide paths, so our best bet is to try
4865 // utf8 path
4866 char* path_utf8 = convert_path_heap(path);
4867 if (!path_utf8) return 0;
4868
4869 // convert mode to ASCII (we mirror _wfopen interface)
4870 char mode_ascii[4] = {0};
4871 for (size_t i = 0; mode[i]; ++i) mode_ascii[i] = static_cast<char>(mode[i]);
4872
4873 // try to open the utf8 path
4874 FILE* result = fopen(path_utf8, mode_ascii);
4875
4876 // free dummy buffer
4877 xml_memory::deallocate(path_utf8);
4878
4879 return result;
4880}
4881#endif
4882
4883PUGI__FN bool save_file_impl(const xml_document& doc, FILE* file,
4884 const char_t* indent, unsigned int flags,
4885 xml_encoding encoding) {
4886 if (!file) return false;
4887
4888 xml_writer_file writer(file);
4889 doc.save(writer, indent, flags, encoding);
4890
4891 return ferror(file) == 0;
4892}
4893
4895 xml_node_struct* node;
4896 char_t* name;
4897
4898 name_null_sentry(xml_node_struct* node_) : node(node_), name(node_->name) {
4899 node->name = 0;
4900 }
4901
4902 ~name_null_sentry() { node->name = name; }
4903};
4904PUGI__NS_END
4905
4906namespace pugi {
4907PUGI__FN xml_writer_file::xml_writer_file(void* file_) : file(file_) {}
4908
4909PUGI__FN void xml_writer_file::write(const void* data, size_t size) {
4910 size_t result = fwrite(data, 1, size, static_cast<FILE*>(file));
4911 (void)!result; // unfortunately we can't do proper error handling here
4912}
4913
4914#ifndef PUGIXML_NO_STL
4915PUGI__FN xml_writer_stream::xml_writer_stream(
4916 std::basic_ostream<char, std::char_traits<char> >& stream)
4917 : narrow_stream(&stream), wide_stream(0) {}
4918
4919PUGI__FN xml_writer_stream::xml_writer_stream(
4920 std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream)
4921 : narrow_stream(0), wide_stream(&stream) {}
4922
4923PUGI__FN void xml_writer_stream::write(const void* data, size_t size) {
4924 if (narrow_stream) {
4925 assert(!wide_stream);
4926 narrow_stream->write(reinterpret_cast<const char*>(data),
4927 static_cast<std::streamsize>(size));
4928 } else {
4929 assert(wide_stream);
4930 assert(size % sizeof(wchar_t) == 0);
4931
4932 wide_stream->write(reinterpret_cast<const wchar_t*>(data),
4933 static_cast<std::streamsize>(size / sizeof(wchar_t)));
4934 }
4935}
4936#endif
4937
4938PUGI__FN xml_tree_walker::xml_tree_walker() : _depth(0) {}
4939
4940PUGI__FN xml_tree_walker::~xml_tree_walker() {}
4941
4942PUGI__FN int xml_tree_walker::depth() const { return _depth; }
4943
4944PUGI__FN bool xml_tree_walker::begin(xml_node&) { return true; }
4945
4946PUGI__FN bool xml_tree_walker::end(xml_node&) { return true; }
4947
4948PUGI__FN xml_attribute::xml_attribute() : _attr(0) {}
4949
4950PUGI__FN xml_attribute::xml_attribute(xml_attribute_struct* attr)
4951 : _attr(attr) {}
4952
4953PUGI__FN static void unspecified_bool_xml_attribute(xml_attribute***) {}
4954
4955PUGI__FN xml_attribute::operator xml_attribute::unspecified_bool_type() const {
4956 return _attr ? unspecified_bool_xml_attribute : 0;
4957}
4958
4959PUGI__FN bool xml_attribute::operator!() const { return !_attr; }
4960
4961PUGI__FN bool xml_attribute::operator==(const xml_attribute& r) const {
4962 return (_attr == r._attr);
4963}
4964
4965PUGI__FN bool xml_attribute::operator!=(const xml_attribute& r) const {
4966 return (_attr != r._attr);
4967}
4968
4969PUGI__FN bool xml_attribute::operator<(const xml_attribute& r) const {
4970 return (_attr < r._attr);
4971}
4972
4973PUGI__FN bool xml_attribute::operator>(const xml_attribute& r) const {
4974 return (_attr > r._attr);
4975}
4976
4977PUGI__FN bool xml_attribute::operator<=(const xml_attribute& r) const {
4978 return (_attr <= r._attr);
4979}
4980
4981PUGI__FN bool xml_attribute::operator>=(const xml_attribute& r) const {
4982 return (_attr >= r._attr);
4983}
4984
4985PUGI__FN xml_attribute xml_attribute::next_attribute() const {
4986 return _attr ? xml_attribute(_attr->next_attribute) : xml_attribute();
4987}
4988
4989PUGI__FN xml_attribute xml_attribute::previous_attribute() const {
4990 return _attr && _attr->prev_attribute_c->next_attribute
4991 ? xml_attribute(_attr->prev_attribute_c)
4992 : xml_attribute();
4993}
4994
4995PUGI__FN const char_t* xml_attribute::as_string(const char_t* def) const {
4996 return (_attr && _attr->value) ? _attr->value + 0 : def;
4997}
4998
4999PUGI__FN int xml_attribute::as_int(int def) const {
5000 return (_attr && _attr->value) ? impl::get_value_int(_attr->value) : def;
5001}
5002
5003PUGI__FN unsigned int xml_attribute::as_uint(unsigned int def) const {
5004 return (_attr && _attr->value) ? impl::get_value_uint(_attr->value) : def;
5005}
5006
5007PUGI__FN double xml_attribute::as_double(double def) const {
5008 return (_attr && _attr->value) ? impl::get_value_double(_attr->value) : def;
5009}
5010
5011PUGI__FN float xml_attribute::as_float(float def) const {
5012 return (_attr && _attr->value) ? impl::get_value_float(_attr->value) : def;
5013}
5014
5015PUGI__FN bool xml_attribute::as_bool(bool def) const {
5016 return (_attr && _attr->value) ? impl::get_value_bool(_attr->value) : def;
5017}
5018
5019#ifdef PUGIXML_HAS_LONG_LONG
5020PUGI__FN long long xml_attribute::as_llong(long long def) const {
5021 return (_attr && _attr->value) ? impl::get_value_llong(_attr->value) : def;
5022}
5023
5024PUGI__FN unsigned long long xml_attribute::as_ullong(
5025 unsigned long long def) const {
5026 return (_attr && _attr->value) ? impl::get_value_ullong(_attr->value) : def;
5027}
5028#endif
5029
5030PUGI__FN bool xml_attribute::empty() const { return !_attr; }
5031
5032PUGI__FN const char_t* xml_attribute::name() const {
5033 return (_attr && _attr->name) ? _attr->name + 0 : PUGIXML_TEXT("");
5034}
5035
5036PUGI__FN const char_t* xml_attribute::value() const {
5037 return (_attr && _attr->value) ? _attr->value + 0 : PUGIXML_TEXT("");
5038}
5039
5040PUGI__FN size_t xml_attribute::hash_value() const {
5041 return static_cast<size_t>(reinterpret_cast<uintptr_t>(_attr) /
5042 sizeof(xml_attribute_struct));
5043}
5044
5045PUGI__FN xml_attribute_struct* xml_attribute::internal_object() const {
5046 return _attr;
5047}
5048
5049PUGI__FN xml_attribute& xml_attribute::operator=(const char_t* rhs) {
5050 set_value(rhs);
5051 return *this;
5052}
5053
5054PUGI__FN xml_attribute& xml_attribute::operator=(int rhs) {
5055 set_value(rhs);
5056 return *this;
5057}
5058
5059PUGI__FN xml_attribute& xml_attribute::operator=(unsigned int rhs) {
5060 set_value(rhs);
5061 return *this;
5062}
5063
5064PUGI__FN xml_attribute& xml_attribute::operator=(long rhs) {
5065 set_value(rhs);
5066 return *this;
5067}
5068
5069PUGI__FN xml_attribute& xml_attribute::operator=(unsigned long rhs) {
5070 set_value(rhs);
5071 return *this;
5072}
5073
5074PUGI__FN xml_attribute& xml_attribute::operator=(double rhs) {
5075 set_value(rhs);
5076 return *this;
5077}
5078
5079PUGI__FN xml_attribute& xml_attribute::operator=(float rhs) {
5080 set_value(rhs);
5081 return *this;
5082}
5083
5084PUGI__FN xml_attribute& xml_attribute::operator=(bool rhs) {
5085 set_value(rhs);
5086 return *this;
5087}
5088
5089#ifdef PUGIXML_HAS_LONG_LONG
5090PUGI__FN xml_attribute& xml_attribute::operator=(long long rhs) {
5091 set_value(rhs);
5092 return *this;
5093}
5094
5095PUGI__FN xml_attribute& xml_attribute::operator=(unsigned long long rhs) {
5096 set_value(rhs);
5097 return *this;
5098}
5099#endif
5100
5101PUGI__FN bool xml_attribute::set_name(const char_t* rhs) {
5102 if (!_attr) return false;
5103
5104 return impl::strcpy_insitu(_attr->name, _attr->header,
5105 impl::xml_memory_page_name_allocated_mask, rhs,
5106 impl::strlength(rhs));
5107}
5108
5109PUGI__FN bool xml_attribute::set_value(const char_t* rhs) {
5110 if (!_attr) return false;
5111
5112 return impl::strcpy_insitu(_attr->value, _attr->header,
5113 impl::xml_memory_page_value_allocated_mask, rhs,
5114 impl::strlength(rhs));
5115}
5116
5117PUGI__FN bool xml_attribute::set_value(int rhs) {
5118 if (!_attr) return false;
5119
5120 return impl::set_value_integer<unsigned int>(
5121 _attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask,
5122 rhs, rhs < 0);
5123}
5124
5125PUGI__FN bool xml_attribute::set_value(unsigned int rhs) {
5126 if (!_attr) return false;
5127
5128 return impl::set_value_integer<unsigned int>(
5129 _attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask,
5130 rhs, false);
5131}
5132
5133PUGI__FN bool xml_attribute::set_value(long rhs) {
5134 if (!_attr) return false;
5135
5136 return impl::set_value_integer<unsigned long>(
5137 _attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask,
5138 rhs, rhs < 0);
5139}
5140
5141PUGI__FN bool xml_attribute::set_value(unsigned long rhs) {
5142 if (!_attr) return false;
5143
5144 return impl::set_value_integer<unsigned long>(
5145 _attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask,
5146 rhs, false);
5147}
5148
5149PUGI__FN bool xml_attribute::set_value(double rhs) {
5150 if (!_attr) return false;
5151
5152 return impl::set_value_convert(_attr->value, _attr->header,
5153 impl::xml_memory_page_value_allocated_mask,
5154 rhs);
5155}
5156
5157PUGI__FN bool xml_attribute::set_value(float rhs) {
5158 if (!_attr) return false;
5159
5160 return impl::set_value_convert(_attr->value, _attr->header,
5161 impl::xml_memory_page_value_allocated_mask,
5162 rhs);
5163}
5164
5165PUGI__FN bool xml_attribute::set_value(bool rhs) {
5166 if (!_attr) return false;
5167
5168 return impl::set_value_bool(_attr->value, _attr->header,
5169 impl::xml_memory_page_value_allocated_mask, rhs);
5170}
5171
5172#ifdef PUGIXML_HAS_LONG_LONG
5173PUGI__FN bool xml_attribute::set_value(long long rhs) {
5174 if (!_attr) return false;
5175
5176 return impl::set_value_integer<unsigned long long>(
5177 _attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask,
5178 rhs, rhs < 0);
5179}
5180
5181PUGI__FN bool xml_attribute::set_value(unsigned long long rhs) {
5182 if (!_attr) return false;
5183
5184 return impl::set_value_integer<unsigned long long>(
5185 _attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask,
5186 rhs, false);
5187}
5188#endif
5189
5190#ifdef __BORLANDC__
5191PUGI__FN bool operator&&(const xml_attribute& lhs, bool rhs) {
5192 return (bool)lhs && rhs;
5193}
5194
5195PUGI__FN bool operator||(const xml_attribute& lhs, bool rhs) {
5196 return (bool)lhs || rhs;
5197}
5198#endif
5199
5200PUGI__FN xml_node::xml_node() : _root(0) {}
5201
5202PUGI__FN xml_node::xml_node(xml_node_struct* p) : _root(p) {}
5203
5204PUGI__FN static void unspecified_bool_xml_node(xml_node***) {}
5205
5206PUGI__FN xml_node::operator xml_node::unspecified_bool_type() const {
5207 return _root ? unspecified_bool_xml_node : 0;
5208}
5209
5210PUGI__FN bool xml_node::operator!() const { return !_root; }
5211
5212PUGI__FN xml_node::iterator xml_node::begin() const {
5213 return iterator(_root ? _root->first_child + 0 : 0, _root);
5214}
5215
5216PUGI__FN xml_node::iterator xml_node::end() const { return iterator(0, _root); }
5217
5218PUGI__FN xml_node::attribute_iterator xml_node::attributes_begin() const {
5219 return attribute_iterator(_root ? _root->first_attribute + 0 : 0, _root);
5220}
5221
5222PUGI__FN xml_node::attribute_iterator xml_node::attributes_end() const {
5223 return attribute_iterator(0, _root);
5224}
5225
5226PUGI__FN xml_object_range<xml_node_iterator> xml_node::children() const {
5227 return xml_object_range<xml_node_iterator>(begin(), end());
5228}
5229
5230PUGI__FN xml_object_range<xml_named_node_iterator> xml_node::children(
5231 const char_t* name_) const {
5232 return xml_object_range<xml_named_node_iterator>(
5233 xml_named_node_iterator(child(name_)._root, _root, name_),
5234 xml_named_node_iterator(0, _root, name_));
5235}
5236
5237PUGI__FN xml_object_range<xml_attribute_iterator> xml_node::attributes() const {
5238 return xml_object_range<xml_attribute_iterator>(attributes_begin(),
5239 attributes_end());
5240}
5241
5242PUGI__FN bool xml_node::operator==(const xml_node& r) const {
5243 return (_root == r._root);
5244}
5245
5246PUGI__FN bool xml_node::operator!=(const xml_node& r) const {
5247 return (_root != r._root);
5248}
5249
5250PUGI__FN bool xml_node::operator<(const xml_node& r) const {
5251 return (_root < r._root);
5252}
5253
5254PUGI__FN bool xml_node::operator>(const xml_node& r) const {
5255 return (_root > r._root);
5256}
5257
5258PUGI__FN bool xml_node::operator<=(const xml_node& r) const {
5259 return (_root <= r._root);
5260}
5261
5262PUGI__FN bool xml_node::operator>=(const xml_node& r) const {
5263 return (_root >= r._root);
5264}
5265
5266PUGI__FN bool xml_node::empty() const { return !_root; }
5267
5268PUGI__FN const char_t* xml_node::name() const {
5269 return (_root && _root->name) ? _root->name + 0 : PUGIXML_TEXT("");
5270}
5271
5272PUGI__FN xml_node_type xml_node::type() const {
5273 return _root ? PUGI__NODETYPE(_root) : node_null;
5274}
5275
5276PUGI__FN const char_t* xml_node::value() const {
5277 return (_root && _root->value) ? _root->value + 0 : PUGIXML_TEXT("");
5278}
5279
5280PUGI__FN xml_node xml_node::child(const char_t* name_) const {
5281 if (!_root) return xml_node();
5282
5283 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
5284 if (i->name && impl::strequal(name_, i->name)) return xml_node(i);
5285
5286 return xml_node();
5287}
5288
5289PUGI__FN xml_attribute xml_node::attribute(const char_t* name_) const {
5290 if (!_root) return xml_attribute();
5291
5292 for (xml_attribute_struct* i = _root->first_attribute; i;
5293 i = i->next_attribute)
5294 if (i->name && impl::strequal(name_, i->name)) return xml_attribute(i);
5295
5296 return xml_attribute();
5297}
5298
5299PUGI__FN xml_node xml_node::next_sibling(const char_t* name_) const {
5300 if (!_root) return xml_node();
5301
5302 for (xml_node_struct* i = _root->next_sibling; i; i = i->next_sibling)
5303 if (i->name && impl::strequal(name_, i->name)) return xml_node(i);
5304
5305 return xml_node();
5306}
5307
5308PUGI__FN xml_node xml_node::next_sibling() const {
5309 return _root ? xml_node(_root->next_sibling) : xml_node();
5310}
5311
5312PUGI__FN xml_node xml_node::previous_sibling(const char_t* name_) const {
5313 if (!_root) return xml_node();
5314
5315 for (xml_node_struct* i = _root->prev_sibling_c; i->next_sibling;
5316 i = i->prev_sibling_c)
5317 if (i->name && impl::strequal(name_, i->name)) return xml_node(i);
5318
5319 return xml_node();
5320}
5321
5322PUGI__FN xml_attribute xml_node::attribute(const char_t* name_,
5323 xml_attribute& hint_) const {
5324 xml_attribute_struct* hint = hint_._attr;
5325
5326 // if hint is not an attribute of node, behavior is not defined
5327 assert(!hint || (_root && impl::is_attribute_of(hint, _root)));
5328
5329 if (!_root) return xml_attribute();
5330
5331 // optimistically search from hint up until the end
5332 for (xml_attribute_struct* i = hint; i; i = i->next_attribute)
5333 if (i->name && impl::strequal(name_, i->name)) {
5334 // update hint to maximize efficiency of searching for consecutive
5335 // attributes
5336 hint_._attr = i->next_attribute;
5337
5338 return xml_attribute(i);
5339 }
5340
5341 // wrap around and search from the first attribute until the hint
5342 // 'j' null pointer check is technically redundant, but it prevents a crash in
5343 // case the assertion above fails
5344 for (xml_attribute_struct* j = _root->first_attribute; j && j != hint;
5345 j = j->next_attribute)
5346 if (j->name && impl::strequal(name_, j->name)) {
5347 // update hint to maximize efficiency of searching for consecutive
5348 // attributes
5349 hint_._attr = j->next_attribute;
5350
5351 return xml_attribute(j);
5352 }
5353
5354 return xml_attribute();
5355}
5356
5357PUGI__FN xml_node xml_node::previous_sibling() const {
5358 if (!_root) return xml_node();
5359
5360 if (_root->prev_sibling_c->next_sibling)
5361 return xml_node(_root->prev_sibling_c);
5362 else
5363 return xml_node();
5364}
5365
5366PUGI__FN xml_node xml_node::parent() const {
5367 return _root ? xml_node(_root->parent) : xml_node();
5368}
5369
5370PUGI__FN xml_node xml_node::root() const {
5371 return _root ? xml_node(&impl::get_document(_root)) : xml_node();
5372}
5373
5374PUGI__FN xml_text xml_node::text() const { return xml_text(_root); }
5375
5376PUGI__FN const char_t* xml_node::child_value() const {
5377 if (!_root) return PUGIXML_TEXT("");
5378
5379 // element nodes can have value if parse_embed_pcdata was used
5380 if (PUGI__NODETYPE(_root) == node_element && _root->value)
5381 return _root->value;
5382
5383 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
5384 if (impl::is_text_node(i) && i->value) return i->value;
5385
5386 return PUGIXML_TEXT("");
5387}
5388
5389PUGI__FN const char_t* xml_node::child_value(const char_t* name_) const {
5390 return child(name_).child_value();
5391}
5392
5393PUGI__FN xml_attribute xml_node::first_attribute() const {
5394 return _root ? xml_attribute(_root->first_attribute) : xml_attribute();
5395}
5396
5397PUGI__FN xml_attribute xml_node::last_attribute() const {
5398 return _root && _root->first_attribute
5399 ? xml_attribute(_root->first_attribute->prev_attribute_c)
5400 : xml_attribute();
5401}
5402
5403PUGI__FN xml_node xml_node::first_child() const {
5404 return _root ? xml_node(_root->first_child) : xml_node();
5405}
5406
5407PUGI__FN xml_node xml_node::last_child() const {
5408 return _root && _root->first_child
5409 ? xml_node(_root->first_child->prev_sibling_c)
5410 : xml_node();
5411}
5412
5413PUGI__FN bool xml_node::set_name(const char_t* rhs) {
5414 xml_node_type type_ = _root ? PUGI__NODETYPE(_root) : node_null;
5415
5416 if (type_ != node_element && type_ != node_pi && type_ != node_declaration)
5417 return false;
5418
5419 return impl::strcpy_insitu(_root->name, _root->header,
5420 impl::xml_memory_page_name_allocated_mask, rhs,
5421 impl::strlength(rhs));
5422}
5423
5424PUGI__FN bool xml_node::set_value(const char_t* rhs) {
5425 xml_node_type type_ = _root ? PUGI__NODETYPE(_root) : node_null;
5426
5427 if (type_ != node_pcdata && type_ != node_cdata && type_ != node_comment &&
5428 type_ != node_pi && type_ != node_doctype)
5429 return false;
5430
5431 return impl::strcpy_insitu(_root->value, _root->header,
5432 impl::xml_memory_page_value_allocated_mask, rhs,
5433 impl::strlength(rhs));
5434}
5435
5436PUGI__FN xml_attribute xml_node::append_attribute(const char_t* name_) {
5437 if (!impl::allow_insert_attribute(type())) return xml_attribute();
5438
5439 impl::xml_allocator& alloc = impl::get_allocator(_root);
5440 if (!alloc.reserve()) return xml_attribute();
5441
5442 xml_attribute a(impl::allocate_attribute(alloc));
5443 if (!a) return xml_attribute();
5444
5445 impl::append_attribute(a._attr, _root);
5446
5447 a.set_name(name_);
5448
5449 return a;
5450}
5451
5452PUGI__FN xml_attribute xml_node::prepend_attribute(const char_t* name_) {
5453 if (!impl::allow_insert_attribute(type())) return xml_attribute();
5454
5455 impl::xml_allocator& alloc = impl::get_allocator(_root);
5456 if (!alloc.reserve()) return xml_attribute();
5457
5458 xml_attribute a(impl::allocate_attribute(alloc));
5459 if (!a) return xml_attribute();
5460
5461 impl::prepend_attribute(a._attr, _root);
5462
5463 a.set_name(name_);
5464
5465 return a;
5466}
5467
5468PUGI__FN xml_attribute xml_node::insert_attribute_after(
5469 const char_t* name_, const xml_attribute& attr) {
5470 if (!impl::allow_insert_attribute(type())) return xml_attribute();
5471 if (!attr || !impl::is_attribute_of(attr._attr, _root))
5472 return xml_attribute();
5473
5474 impl::xml_allocator& alloc = impl::get_allocator(_root);
5475 if (!alloc.reserve()) return xml_attribute();
5476
5477 xml_attribute a(impl::allocate_attribute(alloc));
5478 if (!a) return xml_attribute();
5479
5480 impl::insert_attribute_after(a._attr, attr._attr, _root);
5481
5482 a.set_name(name_);
5483
5484 return a;
5485}
5486
5487PUGI__FN xml_attribute xml_node::insert_attribute_before(
5488 const char_t* name_, const xml_attribute& attr) {
5489 if (!impl::allow_insert_attribute(type())) return xml_attribute();
5490 if (!attr || !impl::is_attribute_of(attr._attr, _root))
5491 return xml_attribute();
5492
5493 impl::xml_allocator& alloc = impl::get_allocator(_root);
5494 if (!alloc.reserve()) return xml_attribute();
5495
5496 xml_attribute a(impl::allocate_attribute(alloc));
5497 if (!a) return xml_attribute();
5498
5499 impl::insert_attribute_before(a._attr, attr._attr, _root);
5500
5501 a.set_name(name_);
5502
5503 return a;
5504}
5505
5506PUGI__FN xml_attribute xml_node::append_copy(const xml_attribute& proto) {
5507 if (!proto) return xml_attribute();
5508 if (!impl::allow_insert_attribute(type())) return xml_attribute();
5509
5510 impl::xml_allocator& alloc = impl::get_allocator(_root);
5511 if (!alloc.reserve()) return xml_attribute();
5512
5513 xml_attribute a(impl::allocate_attribute(alloc));
5514 if (!a) return xml_attribute();
5515
5516 impl::append_attribute(a._attr, _root);
5517 impl::node_copy_attribute(a._attr, proto._attr);
5518
5519 return a;
5520}
5521
5522PUGI__FN xml_attribute xml_node::prepend_copy(const xml_attribute& proto) {
5523 if (!proto) return xml_attribute();
5524 if (!impl::allow_insert_attribute(type())) return xml_attribute();
5525
5526 impl::xml_allocator& alloc = impl::get_allocator(_root);
5527 if (!alloc.reserve()) return xml_attribute();
5528
5529 xml_attribute a(impl::allocate_attribute(alloc));
5530 if (!a) return xml_attribute();
5531
5532 impl::prepend_attribute(a._attr, _root);
5533 impl::node_copy_attribute(a._attr, proto._attr);
5534
5535 return a;
5536}
5537
5538PUGI__FN xml_attribute xml_node::insert_copy_after(const xml_attribute& proto,
5539 const xml_attribute& attr) {
5540 if (!proto) return xml_attribute();
5541 if (!impl::allow_insert_attribute(type())) return xml_attribute();
5542 if (!attr || !impl::is_attribute_of(attr._attr, _root))
5543 return xml_attribute();
5544
5545 impl::xml_allocator& alloc = impl::get_allocator(_root);
5546 if (!alloc.reserve()) return xml_attribute();
5547
5548 xml_attribute a(impl::allocate_attribute(alloc));
5549 if (!a) return xml_attribute();
5550
5551 impl::insert_attribute_after(a._attr, attr._attr, _root);
5552 impl::node_copy_attribute(a._attr, proto._attr);
5553
5554 return a;
5555}
5556
5557PUGI__FN xml_attribute xml_node::insert_copy_before(const xml_attribute& proto,
5558 const xml_attribute& attr) {
5559 if (!proto) return xml_attribute();
5560 if (!impl::allow_insert_attribute(type())) return xml_attribute();
5561 if (!attr || !impl::is_attribute_of(attr._attr, _root))
5562 return xml_attribute();
5563
5564 impl::xml_allocator& alloc = impl::get_allocator(_root);
5565 if (!alloc.reserve()) return xml_attribute();
5566
5567 xml_attribute a(impl::allocate_attribute(alloc));
5568 if (!a) return xml_attribute();
5569
5570 impl::insert_attribute_before(a._attr, attr._attr, _root);
5571 impl::node_copy_attribute(a._attr, proto._attr);
5572
5573 return a;
5574}
5575
5576PUGI__FN xml_node xml_node::append_child(xml_node_type type_) {
5577 if (!impl::allow_insert_child(type(), type_)) return xml_node();
5578
5579 impl::xml_allocator& alloc = impl::get_allocator(_root);
5580 if (!alloc.reserve()) return xml_node();
5581
5582 xml_node n(impl::allocate_node(alloc, type_));
5583 if (!n) return xml_node();
5584
5585 impl::append_node(n._root, _root);
5586
5587 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
5588
5589 return n;
5590}
5591
5592PUGI__FN xml_node xml_node::prepend_child(xml_node_type type_) {
5593 if (!impl::allow_insert_child(type(), type_)) return xml_node();
5594
5595 impl::xml_allocator& alloc = impl::get_allocator(_root);
5596 if (!alloc.reserve()) return xml_node();
5597
5598 xml_node n(impl::allocate_node(alloc, type_));
5599 if (!n) return xml_node();
5600
5601 impl::prepend_node(n._root, _root);
5602
5603 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
5604
5605 return n;
5606}
5607
5608PUGI__FN xml_node xml_node::insert_child_before(xml_node_type type_,
5609 const xml_node& node) {
5610 if (!impl::allow_insert_child(type(), type_)) return xml_node();
5611 if (!node._root || node._root->parent != _root) return xml_node();
5612
5613 impl::xml_allocator& alloc = impl::get_allocator(_root);
5614 if (!alloc.reserve()) return xml_node();
5615
5616 xml_node n(impl::allocate_node(alloc, type_));
5617 if (!n) return xml_node();
5618
5619 impl::insert_node_before(n._root, node._root);
5620
5621 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
5622
5623 return n;
5624}
5625
5626PUGI__FN xml_node xml_node::insert_child_after(xml_node_type type_,
5627 const xml_node& node) {
5628 if (!impl::allow_insert_child(type(), type_)) return xml_node();
5629 if (!node._root || node._root->parent != _root) return xml_node();
5630
5631 impl::xml_allocator& alloc = impl::get_allocator(_root);
5632 if (!alloc.reserve()) return xml_node();
5633
5634 xml_node n(impl::allocate_node(alloc, type_));
5635 if (!n) return xml_node();
5636
5637 impl::insert_node_after(n._root, node._root);
5638
5639 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
5640
5641 return n;
5642}
5643
5644PUGI__FN xml_node xml_node::append_child(const char_t* name_) {
5645 xml_node result = append_child(node_element);
5646
5647 result.set_name(name_);
5648
5649 return result;
5650}
5651
5652PUGI__FN xml_node xml_node::prepend_child(const char_t* name_) {
5653 xml_node result = prepend_child(node_element);
5654
5655 result.set_name(name_);
5656
5657 return result;
5658}
5659
5660PUGI__FN xml_node xml_node::insert_child_after(const char_t* name_,
5661 const xml_node& node) {
5662 xml_node result = insert_child_after(node_element, node);
5663
5664 result.set_name(name_);
5665
5666 return result;
5667}
5668
5669PUGI__FN xml_node xml_node::insert_child_before(const char_t* name_,
5670 const xml_node& node) {
5671 xml_node result = insert_child_before(node_element, node);
5672
5673 result.set_name(name_);
5674
5675 return result;
5676}
5677
5678PUGI__FN xml_node xml_node::append_copy(const xml_node& proto) {
5679 xml_node_type type_ = proto.type();
5680 if (!impl::allow_insert_child(type(), type_)) return xml_node();
5681
5682 impl::xml_allocator& alloc = impl::get_allocator(_root);
5683 if (!alloc.reserve()) return xml_node();
5684
5685 xml_node n(impl::allocate_node(alloc, type_));
5686 if (!n) return xml_node();
5687
5688 impl::append_node(n._root, _root);
5689 impl::node_copy_tree(n._root, proto._root);
5690
5691 return n;
5692}
5693
5694PUGI__FN xml_node xml_node::prepend_copy(const xml_node& proto) {
5695 xml_node_type type_ = proto.type();
5696 if (!impl::allow_insert_child(type(), type_)) return xml_node();
5697
5698 impl::xml_allocator& alloc = impl::get_allocator(_root);
5699 if (!alloc.reserve()) return xml_node();
5700
5701 xml_node n(impl::allocate_node(alloc, type_));
5702 if (!n) return xml_node();
5703
5704 impl::prepend_node(n._root, _root);
5705 impl::node_copy_tree(n._root, proto._root);
5706
5707 return n;
5708}
5709
5710PUGI__FN xml_node xml_node::insert_copy_after(const xml_node& proto,
5711 const xml_node& node) {
5712 xml_node_type type_ = proto.type();
5713 if (!impl::allow_insert_child(type(), type_)) return xml_node();
5714 if (!node._root || node._root->parent != _root) return xml_node();
5715
5716 impl::xml_allocator& alloc = impl::get_allocator(_root);
5717 if (!alloc.reserve()) return xml_node();
5718
5719 xml_node n(impl::allocate_node(alloc, type_));
5720 if (!n) return xml_node();
5721
5722 impl::insert_node_after(n._root, node._root);
5723 impl::node_copy_tree(n._root, proto._root);
5724
5725 return n;
5726}
5727
5728PUGI__FN xml_node xml_node::insert_copy_before(const xml_node& proto,
5729 const xml_node& node) {
5730 xml_node_type type_ = proto.type();
5731 if (!impl::allow_insert_child(type(), type_)) return xml_node();
5732 if (!node._root || node._root->parent != _root) return xml_node();
5733
5734 impl::xml_allocator& alloc = impl::get_allocator(_root);
5735 if (!alloc.reserve()) return xml_node();
5736
5737 xml_node n(impl::allocate_node(alloc, type_));
5738 if (!n) return xml_node();
5739
5740 impl::insert_node_before(n._root, node._root);
5741 impl::node_copy_tree(n._root, proto._root);
5742
5743 return n;
5744}
5745
5746PUGI__FN xml_node xml_node::append_move(const xml_node& moved) {
5747 if (!impl::allow_move(*this, moved)) return xml_node();
5748
5749 impl::xml_allocator& alloc = impl::get_allocator(_root);
5750 if (!alloc.reserve()) return xml_node();
5751
5752 // disable document_buffer_order optimization since moving nodes around
5753 // changes document order without changing buffer pointers
5754 impl::get_document(_root).header |=
5755 impl::xml_memory_page_contents_shared_mask;
5756
5757 impl::remove_node(moved._root);
5758 impl::append_node(moved._root, _root);
5759
5760 return moved;
5761}
5762
5763PUGI__FN xml_node xml_node::prepend_move(const xml_node& moved) {
5764 if (!impl::allow_move(*this, moved)) return xml_node();
5765
5766 impl::xml_allocator& alloc = impl::get_allocator(_root);
5767 if (!alloc.reserve()) return xml_node();
5768
5769 // disable document_buffer_order optimization since moving nodes around
5770 // changes document order without changing buffer pointers
5771 impl::get_document(_root).header |=
5772 impl::xml_memory_page_contents_shared_mask;
5773
5774 impl::remove_node(moved._root);
5775 impl::prepend_node(moved._root, _root);
5776
5777 return moved;
5778}
5779
5780PUGI__FN xml_node xml_node::insert_move_after(const xml_node& moved,
5781 const xml_node& node) {
5782 if (!impl::allow_move(*this, moved)) return xml_node();
5783 if (!node._root || node._root->parent != _root) return xml_node();
5784 if (moved._root == node._root) return xml_node();
5785
5786 impl::xml_allocator& alloc = impl::get_allocator(_root);
5787 if (!alloc.reserve()) return xml_node();
5788
5789 // disable document_buffer_order optimization since moving nodes around
5790 // changes document order without changing buffer pointers
5791 impl::get_document(_root).header |=
5792 impl::xml_memory_page_contents_shared_mask;
5793
5794 impl::remove_node(moved._root);
5795 impl::insert_node_after(moved._root, node._root);
5796
5797 return moved;
5798}
5799
5800PUGI__FN xml_node xml_node::insert_move_before(const xml_node& moved,
5801 const xml_node& node) {
5802 if (!impl::allow_move(*this, moved)) return xml_node();
5803 if (!node._root || node._root->parent != _root) return xml_node();
5804 if (moved._root == node._root) return xml_node();
5805
5806 impl::xml_allocator& alloc = impl::get_allocator(_root);
5807 if (!alloc.reserve()) return xml_node();
5808
5809 // disable document_buffer_order optimization since moving nodes around
5810 // changes document order without changing buffer pointers
5811 impl::get_document(_root).header |=
5812 impl::xml_memory_page_contents_shared_mask;
5813
5814 impl::remove_node(moved._root);
5815 impl::insert_node_before(moved._root, node._root);
5816
5817 return moved;
5818}
5819
5820PUGI__FN bool xml_node::remove_attribute(const char_t* name_) {
5821 return remove_attribute(attribute(name_));
5822}
5823
5824PUGI__FN bool xml_node::remove_attribute(const xml_attribute& a) {
5825 if (!_root || !a._attr) return false;
5826 if (!impl::is_attribute_of(a._attr, _root)) return false;
5827
5828 impl::xml_allocator& alloc = impl::get_allocator(_root);
5829 if (!alloc.reserve()) return false;
5830
5831 impl::remove_attribute(a._attr, _root);
5832 impl::destroy_attribute(a._attr, alloc);
5833
5834 return true;
5835}
5836
5837PUGI__FN bool xml_node::remove_child(const char_t* name_) {
5838 return remove_child(child(name_));
5839}
5840
5841PUGI__FN bool xml_node::remove_child(const xml_node& n) {
5842 if (!_root || !n._root || n._root->parent != _root) return false;
5843
5844 impl::xml_allocator& alloc = impl::get_allocator(_root);
5845 if (!alloc.reserve()) return false;
5846
5847 impl::remove_node(n._root);
5848 impl::destroy_node(n._root, alloc);
5849
5850 return true;
5851}
5852
5853PUGI__FN xml_parse_result xml_node::append_buffer(const void* contents,
5854 size_t size,
5855 unsigned int options,
5856 xml_encoding encoding) {
5857 // append_buffer is only valid for elements/documents
5858 if (!impl::allow_insert_child(type(), node_element))
5859 return impl::make_parse_result(status_append_invalid_root);
5860
5861 // get document node
5862 impl::xml_document_struct* doc = &impl::get_document(_root);
5863
5864 // disable document_buffer_order optimization since in a document with
5865 // multiple buffers comparing buffer pointers does not make sense
5866 doc->header |= impl::xml_memory_page_contents_shared_mask;
5867
5868 // get extra buffer element (we'll store the document fragment buffer there so
5869 // that we can deallocate it later)
5870 impl::xml_memory_page* page = 0;
5871 impl::xml_extra_buffer* extra = static_cast<impl::xml_extra_buffer*>(
5872 doc->allocate_memory(sizeof(impl::xml_extra_buffer), page));
5873 (void)page;
5874
5875 if (!extra) return impl::make_parse_result(status_out_of_memory);
5876
5877 // add extra buffer to the list
5878 extra->buffer = 0;
5879 extra->next = doc->extra_buffers;
5880 doc->extra_buffers = extra;
5881
5882 // name of the root has to be NULL before parsing - otherwise closing node
5883 // mismatches will not be detected at the top level
5884 impl::name_null_sentry sentry(_root);
5885
5886 return impl::load_buffer_impl(doc, _root, const_cast<void*>(contents), size,
5887 options, encoding, false, false,
5888 &extra->buffer);
5889}
5890
5891PUGI__FN xml_node
5892xml_node::find_child_by_attribute(const char_t* name_, const char_t* attr_name,
5893 const char_t* attr_value) const {
5894 if (!_root) return xml_node();
5895
5896 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
5897 if (i->name && impl::strequal(name_, i->name)) {
5898 for (xml_attribute_struct* a = i->first_attribute; a;
5899 a = a->next_attribute)
5900 if (a->name && impl::strequal(attr_name, a->name) &&
5901 impl::strequal(attr_value,
5902 a->value ? a->value + 0 : PUGIXML_TEXT("")))
5903 return xml_node(i);
5904 }
5905
5906 return xml_node();
5907}
5908
5909PUGI__FN xml_node xml_node::find_child_by_attribute(
5910 const char_t* attr_name, const char_t* attr_value) const {
5911 if (!_root) return xml_node();
5912
5913 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
5914 for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute)
5915 if (a->name && impl::strequal(attr_name, a->name) &&
5916 impl::strequal(attr_value,
5917 a->value ? a->value + 0 : PUGIXML_TEXT("")))
5918 return xml_node(i);
5919
5920 return xml_node();
5921}
5922
5923#ifndef PUGIXML_NO_STL
5924PUGI__FN string_t xml_node::path(char_t delimiter) const {
5925 if (!_root) return string_t();
5926
5927 size_t offset = 0;
5928
5929 for (xml_node_struct* i = _root; i; i = i->parent) {
5930 offset += (i != _root);
5931 offset += i->name ? impl::strlength(i->name) : 0;
5932 }
5933
5934 string_t result;
5935 result.resize(offset);
5936
5937 for (xml_node_struct* j = _root; j; j = j->parent) {
5938 if (j != _root) result[--offset] = delimiter;
5939
5940 if (j->name && *j->name) {
5941 size_t length = impl::strlength(j->name);
5942
5943 offset -= length;
5944 memcpy(&result[offset], j->name, length * sizeof(char_t));
5945 }
5946 }
5947
5948 assert(offset == 0);
5949
5950 return result;
5951}
5952#endif
5953
5954PUGI__FN xml_node xml_node::first_element_by_path(const char_t* path_,
5955 char_t delimiter) const {
5956 xml_node found = *this; // Current search context.
5957
5958 if (!_root || !path_ || !path_[0]) return found;
5959
5960 if (path_[0] == delimiter) {
5961 // Absolute path; e.g. '/foo/bar'
5962 found = found.root();
5963 ++path_;
5964 }
5965
5966 const char_t* path_segment = path_;
5967
5968 while (*path_segment == delimiter) ++path_segment;
5969
5970 const char_t* path_segment_end = path_segment;
5971
5972 while (*path_segment_end && *path_segment_end != delimiter)
5973 ++path_segment_end;
5974
5975 if (path_segment == path_segment_end) return found;
5976
5977 const char_t* next_segment = path_segment_end;
5978
5979 while (*next_segment == delimiter) ++next_segment;
5980
5981 if (*path_segment == '.' && path_segment + 1 == path_segment_end)
5982 return found.first_element_by_path(next_segment, delimiter);
5983 else if (*path_segment == '.' && *(path_segment + 1) == '.' &&
5984 path_segment + 2 == path_segment_end)
5985 return found.parent().first_element_by_path(next_segment, delimiter);
5986 else {
5987 for (xml_node_struct* j = found._root->first_child; j;
5988 j = j->next_sibling) {
5989 if (j->name && impl::strequalrange(j->name, path_segment,
5990 static_cast<size_t>(path_segment_end -
5991 path_segment))) {
5992 xml_node subsearch =
5993 xml_node(j).first_element_by_path(next_segment, delimiter);
5994
5995 if (subsearch) return subsearch;
5996 }
5997 }
5998
5999 return xml_node();
6000 }
6001}
6002
6003PUGI__FN bool xml_node::traverse(xml_tree_walker& walker) {
6004 walker._depth = -1;
6005
6006 xml_node arg_begin = *this;
6007 if (!walker.begin(arg_begin)) return false;
6008
6009 xml_node cur = first_child();
6010
6011 if (cur) {
6012 ++walker._depth;
6013
6014 do {
6015 xml_node arg_for_each = cur;
6016 if (!walker.for_each(arg_for_each)) return false;
6017
6018 if (cur.first_child()) {
6019 ++walker._depth;
6020 cur = cur.first_child();
6021 } else if (cur.next_sibling())
6022 cur = cur.next_sibling();
6023 else {
6024 // Borland C++ workaround
6025 while (!cur.next_sibling() && cur != *this && !cur.parent().empty()) {
6026 --walker._depth;
6027 cur = cur.parent();
6028 }
6029
6030 if (cur != *this) cur = cur.next_sibling();
6031 }
6032 } while (cur && cur != *this);
6033 }
6034
6035 assert(walker._depth == -1);
6036
6037 xml_node arg_end = *this;
6038 return walker.end(arg_end);
6039}
6040
6041PUGI__FN size_t xml_node::hash_value() const {
6042 return static_cast<size_t>(reinterpret_cast<uintptr_t>(_root) /
6043 sizeof(xml_node_struct));
6044}
6045
6046PUGI__FN xml_node_struct* xml_node::internal_object() const { return _root; }
6047
6048PUGI__FN void xml_node::print(xml_writer& writer, const char_t* indent,
6049 unsigned int flags, xml_encoding encoding,
6050 unsigned int depth) const {
6051 if (!_root) return;
6052
6053 impl::xml_buffered_writer buffered_writer(writer, encoding);
6054
6055 impl::node_output(buffered_writer, _root, indent, flags, depth);
6056
6057 buffered_writer.flush();
6058}
6059
6060#ifndef PUGIXML_NO_STL
6061PUGI__FN void xml_node::print(
6062 std::basic_ostream<char, std::char_traits<char> >& stream,
6063 const char_t* indent, unsigned int flags, xml_encoding encoding,
6064 unsigned int depth) const {
6065 xml_writer_stream writer(stream);
6066
6067 print(writer, indent, flags, encoding, depth);
6068}
6069
6070PUGI__FN void xml_node::print(
6071 std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream,
6072 const char_t* indent, unsigned int flags, unsigned int depth) const {
6073 xml_writer_stream writer(stream);
6074
6075 print(writer, indent, flags, encoding_wchar, depth);
6076}
6077#endif
6078
6079PUGI__FN ptrdiff_t xml_node::offset_debug() const {
6080 if (!_root) return -1;
6081
6082 impl::xml_document_struct& doc = impl::get_document(_root);
6083
6084 // we can determine the offset reliably only if there is exactly once parse
6085 // buffer
6086 if (!doc.buffer || doc.extra_buffers) return -1;
6087
6088 switch (type()) {
6089 case node_document:
6090 return 0;
6091
6092 case node_element:
6093 case node_declaration:
6094 case node_pi:
6095 return _root->name &&
6096 (_root->header &
6097 impl::xml_memory_page_name_allocated_or_shared_mask) == 0
6098 ? _root->name - doc.buffer
6099 : -1;
6100
6101 case node_pcdata:
6102 case node_cdata:
6103 case node_comment:
6104 case node_doctype:
6105 return _root->value &&
6106 (_root->header &
6107 impl::xml_memory_page_value_allocated_or_shared_mask) == 0
6108 ? _root->value - doc.buffer
6109 : -1;
6110
6111 default:
6112 return -1;
6113 }
6114}
6115
6116#ifdef __BORLANDC__
6117PUGI__FN bool operator&&(const xml_node& lhs, bool rhs) {
6118 return (bool)lhs && rhs;
6119}
6120
6121PUGI__FN bool operator||(const xml_node& lhs, bool rhs) {
6122 return (bool)lhs || rhs;
6123}
6124#endif
6125
6126PUGI__FN xml_text::xml_text(xml_node_struct* root) : _root(root) {}
6127
6128PUGI__FN xml_node_struct* xml_text::_data() const {
6129 if (!_root || impl::is_text_node(_root)) return _root;
6130
6131 // element nodes can have value if parse_embed_pcdata was used
6132 if (PUGI__NODETYPE(_root) == node_element && _root->value) return _root;
6133
6134 for (xml_node_struct* node = _root->first_child; node;
6135 node = node->next_sibling)
6136 if (impl::is_text_node(node)) return node;
6137
6138 return 0;
6139}
6140
6141PUGI__FN xml_node_struct* xml_text::_data_new() {
6142 xml_node_struct* d = _data();
6143 if (d) return d;
6144
6145 return xml_node(_root).append_child(node_pcdata).internal_object();
6146}
6147
6148PUGI__FN xml_text::xml_text() : _root(0) {}
6149
6150PUGI__FN static void unspecified_bool_xml_text(xml_text***) {}
6151
6152PUGI__FN xml_text::operator xml_text::unspecified_bool_type() const {
6153 return _data() ? unspecified_bool_xml_text : 0;
6154}
6155
6156PUGI__FN bool xml_text::operator!() const { return !_data(); }
6157
6158PUGI__FN bool xml_text::empty() const { return _data() == 0; }
6159
6160PUGI__FN const char_t* xml_text::get() const {
6161 xml_node_struct* d = _data();
6162
6163 return (d && d->value) ? d->value + 0 : PUGIXML_TEXT("");
6164}
6165
6166PUGI__FN const char_t* xml_text::as_string(const char_t* def) const {
6167 xml_node_struct* d = _data();
6168
6169 return (d && d->value) ? d->value + 0 : def;
6170}
6171
6172PUGI__FN int xml_text::as_int(int def) const {
6173 xml_node_struct* d = _data();
6174
6175 return (d && d->value) ? impl::get_value_int(d->value) : def;
6176}
6177
6178PUGI__FN unsigned int xml_text::as_uint(unsigned int def) const {
6179 xml_node_struct* d = _data();
6180
6181 return (d && d->value) ? impl::get_value_uint(d->value) : def;
6182}
6183
6184PUGI__FN double xml_text::as_double(double def) const {
6185 xml_node_struct* d = _data();
6186
6187 return (d && d->value) ? impl::get_value_double(d->value) : def;
6188}
6189
6190PUGI__FN float xml_text::as_float(float def) const {
6191 xml_node_struct* d = _data();
6192
6193 return (d && d->value) ? impl::get_value_float(d->value) : def;
6194}
6195
6196PUGI__FN bool xml_text::as_bool(bool def) const {
6197 xml_node_struct* d = _data();
6198
6199 return (d && d->value) ? impl::get_value_bool(d->value) : def;
6200}
6201
6202#ifdef PUGIXML_HAS_LONG_LONG
6203PUGI__FN long long xml_text::as_llong(long long def) const {
6204 xml_node_struct* d = _data();
6205
6206 return (d && d->value) ? impl::get_value_llong(d->value) : def;
6207}
6208
6209PUGI__FN unsigned long long xml_text::as_ullong(unsigned long long def) const {
6210 xml_node_struct* d = _data();
6211
6212 return (d && d->value) ? impl::get_value_ullong(d->value) : def;
6213}
6214#endif
6215
6216PUGI__FN bool xml_text::set(const char_t* rhs) {
6217 xml_node_struct* dn = _data_new();
6218
6219 return dn ? impl::strcpy_insitu(dn->value, dn->header,
6220 impl::xml_memory_page_value_allocated_mask,
6221 rhs, impl::strlength(rhs))
6222 : false;
6223}
6224
6225PUGI__FN bool xml_text::set(int rhs) {
6226 xml_node_struct* dn = _data_new();
6227
6228 return dn ? impl::set_value_integer<unsigned int>(
6229 dn->value, dn->header,
6230 impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0)
6231 : false;
6232}
6233
6234PUGI__FN bool xml_text::set(unsigned int rhs) {
6235 xml_node_struct* dn = _data_new();
6236
6237 return dn ? impl::set_value_integer<unsigned int>(
6238 dn->value, dn->header,
6239 impl::xml_memory_page_value_allocated_mask, rhs, false)
6240 : false;
6241}
6242
6243PUGI__FN bool xml_text::set(long rhs) {
6244 xml_node_struct* dn = _data_new();
6245
6246 return dn ? impl::set_value_integer<unsigned long>(
6247 dn->value, dn->header,
6248 impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0)
6249 : false;
6250}
6251
6252PUGI__FN bool xml_text::set(unsigned long rhs) {
6253 xml_node_struct* dn = _data_new();
6254
6255 return dn ? impl::set_value_integer<unsigned long>(
6256 dn->value, dn->header,
6257 impl::xml_memory_page_value_allocated_mask, rhs, false)
6258 : false;
6259}
6260
6261PUGI__FN bool xml_text::set(float rhs) {
6262 xml_node_struct* dn = _data_new();
6263
6264 return dn ? impl::set_value_convert(
6265 dn->value, dn->header,
6266 impl::xml_memory_page_value_allocated_mask, rhs)
6267 : false;
6268}
6269
6270PUGI__FN bool xml_text::set(double rhs) {
6271 xml_node_struct* dn = _data_new();
6272
6273 return dn ? impl::set_value_convert(
6274 dn->value, dn->header,
6275 impl::xml_memory_page_value_allocated_mask, rhs)
6276 : false;
6277}
6278
6279PUGI__FN bool xml_text::set(bool rhs) {
6280 xml_node_struct* dn = _data_new();
6281
6282 return dn ? impl::set_value_bool(dn->value, dn->header,
6283 impl::xml_memory_page_value_allocated_mask,
6284 rhs)
6285 : false;
6286}
6287
6288#ifdef PUGIXML_HAS_LONG_LONG
6289PUGI__FN bool xml_text::set(long long rhs) {
6290 xml_node_struct* dn = _data_new();
6291
6292 return dn ? impl::set_value_integer<unsigned long long>(
6293 dn->value, dn->header,
6294 impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0)
6295 : false;
6296}
6297
6298PUGI__FN bool xml_text::set(unsigned long long rhs) {
6299 xml_node_struct* dn = _data_new();
6300
6301 return dn ? impl::set_value_integer<unsigned long long>(
6302 dn->value, dn->header,
6303 impl::xml_memory_page_value_allocated_mask, rhs, false)
6304 : false;
6305}
6306#endif
6307
6308PUGI__FN xml_text& xml_text::operator=(const char_t* rhs) {
6309 set(rhs);
6310 return *this;
6311}
6312
6313PUGI__FN xml_text& xml_text::operator=(int rhs) {
6314 set(rhs);
6315 return *this;
6316}
6317
6318PUGI__FN xml_text& xml_text::operator=(unsigned int rhs) {
6319 set(rhs);
6320 return *this;
6321}
6322
6323PUGI__FN xml_text& xml_text::operator=(long rhs) {
6324 set(rhs);
6325 return *this;
6326}
6327
6328PUGI__FN xml_text& xml_text::operator=(unsigned long rhs) {
6329 set(rhs);
6330 return *this;
6331}
6332
6333PUGI__FN xml_text& xml_text::operator=(double rhs) {
6334 set(rhs);
6335 return *this;
6336}
6337
6338PUGI__FN xml_text& xml_text::operator=(float rhs) {
6339 set(rhs);
6340 return *this;
6341}
6342
6343PUGI__FN xml_text& xml_text::operator=(bool rhs) {
6344 set(rhs);
6345 return *this;
6346}
6347
6348#ifdef PUGIXML_HAS_LONG_LONG
6349PUGI__FN xml_text& xml_text::operator=(long long rhs) {
6350 set(rhs);
6351 return *this;
6352}
6353
6354PUGI__FN xml_text& xml_text::operator=(unsigned long long rhs) {
6355 set(rhs);
6356 return *this;
6357}
6358#endif
6359
6360PUGI__FN xml_node xml_text::data() const { return xml_node(_data()); }
6361
6362#ifdef __BORLANDC__
6363PUGI__FN bool operator&&(const xml_text& lhs, bool rhs) {
6364 return (bool)lhs && rhs;
6365}
6366
6367PUGI__FN bool operator||(const xml_text& lhs, bool rhs) {
6368 return (bool)lhs || rhs;
6369}
6370#endif
6371
6372PUGI__FN xml_node_iterator::xml_node_iterator() {}
6373
6374PUGI__FN xml_node_iterator::xml_node_iterator(const xml_node& node)
6375 : _wrap(node), _parent(node.parent()) {}
6376
6377PUGI__FN xml_node_iterator::xml_node_iterator(xml_node_struct* ref,
6378 xml_node_struct* parent)
6379 : _wrap(ref), _parent(parent) {}
6380
6381PUGI__FN bool xml_node_iterator::operator==(
6382 const xml_node_iterator& rhs) const {
6383 return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root;
6384}
6385
6386PUGI__FN bool xml_node_iterator::operator!=(
6387 const xml_node_iterator& rhs) const {
6388 return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root;
6389}
6390
6391PUGI__FN xml_node& xml_node_iterator::operator*() const {
6392 assert(_wrap._root);
6393 return _wrap;
6394}
6395
6396PUGI__FN xml_node* xml_node_iterator::operator->() const {
6397 assert(_wrap._root);
6398 return const_cast<xml_node*>(&_wrap); // BCC5 workaround
6399}
6400
6401PUGI__FN const xml_node_iterator& xml_node_iterator::operator++() {
6402 assert(_wrap._root);
6403 _wrap._root = _wrap._root->next_sibling;
6404 return *this;
6405}
6406
6407PUGI__FN xml_node_iterator xml_node_iterator::operator++(int) {
6408 xml_node_iterator temp = *this;
6409 ++*this;
6410 return temp;
6411}
6412
6413PUGI__FN const xml_node_iterator& xml_node_iterator::operator--() {
6414 _wrap = _wrap._root ? _wrap.previous_sibling() : _parent.last_child();
6415 return *this;
6416}
6417
6418PUGI__FN xml_node_iterator xml_node_iterator::operator--(int) {
6419 xml_node_iterator temp = *this;
6420 --*this;
6421 return temp;
6422}
6423
6424PUGI__FN xml_attribute_iterator::xml_attribute_iterator() {}
6425
6426PUGI__FN xml_attribute_iterator::xml_attribute_iterator(
6427 const xml_attribute& attr, const xml_node& parent)
6428 : _wrap(attr), _parent(parent) {}
6429
6430PUGI__FN xml_attribute_iterator::xml_attribute_iterator(
6431 xml_attribute_struct* ref, xml_node_struct* parent)
6432 : _wrap(ref), _parent(parent) {}
6433
6434PUGI__FN bool xml_attribute_iterator::operator==(
6435 const xml_attribute_iterator& rhs) const {
6436 return _wrap._attr == rhs._wrap._attr && _parent._root == rhs._parent._root;
6437}
6438
6439PUGI__FN bool xml_attribute_iterator::operator!=(
6440 const xml_attribute_iterator& rhs) const {
6441 return _wrap._attr != rhs._wrap._attr || _parent._root != rhs._parent._root;
6442}
6443
6444PUGI__FN xml_attribute& xml_attribute_iterator::operator*() const {
6445 assert(_wrap._attr);
6446 return _wrap;
6447}
6448
6449PUGI__FN xml_attribute* xml_attribute_iterator::operator->() const {
6450 assert(_wrap._attr);
6451 return const_cast<xml_attribute*>(&_wrap); // BCC5 workaround
6452}
6453
6454PUGI__FN const xml_attribute_iterator& xml_attribute_iterator::operator++() {
6455 assert(_wrap._attr);
6456 _wrap._attr = _wrap._attr->next_attribute;
6457 return *this;
6458}
6459
6460PUGI__FN xml_attribute_iterator xml_attribute_iterator::operator++(int) {
6461 xml_attribute_iterator temp = *this;
6462 ++*this;
6463 return temp;
6464}
6465
6466PUGI__FN const xml_attribute_iterator& xml_attribute_iterator::operator--() {
6467 _wrap = _wrap._attr ? _wrap.previous_attribute() : _parent.last_attribute();
6468 return *this;
6469}
6470
6471PUGI__FN xml_attribute_iterator xml_attribute_iterator::operator--(int) {
6472 xml_attribute_iterator temp = *this;
6473 --*this;
6474 return temp;
6475}
6476
6477PUGI__FN xml_named_node_iterator::xml_named_node_iterator() : _name(0) {}
6478
6479PUGI__FN xml_named_node_iterator::xml_named_node_iterator(const xml_node& node,
6480 const char_t* name)
6481 : _wrap(node), _parent(node.parent()), _name(name) {}
6482
6483PUGI__FN xml_named_node_iterator::xml_named_node_iterator(
6484 xml_node_struct* ref, xml_node_struct* parent, const char_t* name)
6485 : _wrap(ref), _parent(parent), _name(name) {}
6486
6487PUGI__FN bool xml_named_node_iterator::operator==(
6488 const xml_named_node_iterator& rhs) const {
6489 return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root;
6490}
6491
6492PUGI__FN bool xml_named_node_iterator::operator!=(
6493 const xml_named_node_iterator& rhs) const {
6494 return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root;
6495}
6496
6497PUGI__FN xml_node& xml_named_node_iterator::operator*() const {
6498 assert(_wrap._root);
6499 return _wrap;
6500}
6501
6502PUGI__FN xml_node* xml_named_node_iterator::operator->() const {
6503 assert(_wrap._root);
6504 return const_cast<xml_node*>(&_wrap); // BCC5 workaround
6505}
6506
6507PUGI__FN const xml_named_node_iterator& xml_named_node_iterator::operator++() {
6508 assert(_wrap._root);
6509 _wrap = _wrap.next_sibling(_name);
6510 return *this;
6511}
6512
6513PUGI__FN xml_named_node_iterator xml_named_node_iterator::operator++(int) {
6514 xml_named_node_iterator temp = *this;
6515 ++*this;
6516 return temp;
6517}
6518
6519PUGI__FN const xml_named_node_iterator& xml_named_node_iterator::operator--() {
6520 if (_wrap._root)
6521 _wrap = _wrap.previous_sibling(_name);
6522 else {
6523 _wrap = _parent.last_child();
6524
6525 if (!impl::strequal(_wrap.name(), _name))
6526 _wrap = _wrap.previous_sibling(_name);
6527 }
6528
6529 return *this;
6530}
6531
6532PUGI__FN xml_named_node_iterator xml_named_node_iterator::operator--(int) {
6533 xml_named_node_iterator temp = *this;
6534 --*this;
6535 return temp;
6536}
6537
6538PUGI__FN xml_parse_result::xml_parse_result()
6539 : status(status_internal_error), offset(0), encoding(encoding_auto) {}
6540
6541PUGI__FN xml_parse_result::operator bool() const { return status == status_ok; }
6542
6543PUGI__FN const char* xml_parse_result::description() const {
6544 switch (status) {
6545 case status_ok:
6546 return "No error";
6547
6548 case status_file_not_found:
6549 return "File was not found";
6550 case status_io_error:
6551 return "Error reading from file/stream";
6552 case status_out_of_memory:
6553 return "Could not allocate memory";
6554 case status_internal_error:
6555 return "Internal error occurred";
6556
6557 case status_unrecognized_tag:
6558 return "Could not determine tag type";
6559
6560 case status_bad_pi:
6561 return "Error parsing document declaration/processing instruction";
6562 case status_bad_comment:
6563 return "Error parsing comment";
6564 case status_bad_cdata:
6565 return "Error parsing CDATA section";
6566 case status_bad_doctype:
6567 return "Error parsing document type declaration";
6568 case status_bad_pcdata:
6569 return "Error parsing PCDATA section";
6570 case status_bad_start_element:
6571 return "Error parsing start element tag";
6572 case status_bad_attribute:
6573 return "Error parsing element attribute";
6574 case status_bad_end_element:
6575 return "Error parsing end element tag";
6576 case status_end_element_mismatch:
6577 return "Start-end tags mismatch";
6578
6579 case status_append_invalid_root:
6580 return "Unable to append nodes: root is not an element or document";
6581
6582 case status_no_document_element:
6583 return "No document element found";
6584
6585 default:
6586 return "Unknown error";
6587 }
6588}
6589
6590PUGI__FN xml_document::xml_document() : _buffer(0) { _create(); }
6591
6592PUGI__FN xml_document::~xml_document() { _destroy(); }
6593
6594PUGI__FN void xml_document::reset() {
6595 _destroy();
6596 _create();
6597}
6598
6599PUGI__FN void xml_document::reset(const xml_document& proto) {
6600 reset();
6601
6602 for (xml_node cur = proto.first_child(); cur; cur = cur.next_sibling())
6603 append_copy(cur);
6604}
6605
6606PUGI__FN void xml_document::_create() {
6607 assert(!_root);
6608
6609#ifdef PUGIXML_COMPACT
6610 const size_t page_offset = sizeof(uint32_t);
6611#else
6612 const size_t page_offset = 0;
6613#endif
6614
6615 // initialize sentinel page
6616 PUGI__STATIC_ASSERT(sizeof(impl::xml_memory_page) +
6617 sizeof(impl::xml_document_struct) + page_offset <=
6618 sizeof(_memory));
6619
6620 // prepare page structure
6621 impl::xml_memory_page* page = impl::xml_memory_page::construct(_memory);
6622 assert(page);
6623
6624 page->busy_size = impl::xml_memory_page_size;
6625
6626 // setup first page marker
6627#ifdef PUGIXML_COMPACT
6628 // round-trip through void* to avoid 'cast increases required alignment of
6629 // target type' warning
6630 page->compact_page_marker = reinterpret_cast<uint32_t*>(static_cast<void*>(
6631 reinterpret_cast<char*>(page) + sizeof(impl::xml_memory_page)));
6632 *page->compact_page_marker = sizeof(impl::xml_memory_page);
6633#endif
6634
6635 // allocate new root
6636 _root = new (reinterpret_cast<char*>(page) + sizeof(impl::xml_memory_page) +
6637 page_offset) impl::xml_document_struct(page);
6638 _root->prev_sibling_c = _root;
6639
6640 // setup sentinel page
6641 page->allocator = static_cast<impl::xml_document_struct*>(_root);
6642
6643 // setup hash table pointer in allocator
6644#ifdef PUGIXML_COMPACT
6645 page->allocator->_hash =
6646 &static_cast<impl::xml_document_struct*>(_root)->hash;
6647#endif
6648
6649 // verify the document allocation
6650 assert(reinterpret_cast<char*>(_root) + sizeof(impl::xml_document_struct) <=
6651 _memory + sizeof(_memory));
6652}
6653
6654PUGI__FN void xml_document::_destroy() {
6655 assert(_root);
6656
6657 // destroy static storage
6658 if (_buffer) {
6659 impl::xml_memory::deallocate(_buffer);
6660 _buffer = 0;
6661 }
6662
6663 // destroy extra buffers (note: no need to destroy linked list nodes, they're
6664 // allocated using document allocator)
6665 for (impl::xml_extra_buffer* extra =
6666 static_cast<impl::xml_document_struct*>(_root)->extra_buffers;
6667 extra; extra = extra->next) {
6668 if (extra->buffer) impl::xml_memory::deallocate(extra->buffer);
6669 }
6670
6671 // destroy dynamic storage, leave sentinel page (it's in static memory)
6672 impl::xml_memory_page* root_page = PUGI__GETPAGE(_root);
6673 assert(root_page && !root_page->prev);
6674 assert(reinterpret_cast<char*>(root_page) >= _memory &&
6675 reinterpret_cast<char*>(root_page) < _memory + sizeof(_memory));
6676
6677 for (impl::xml_memory_page* page = root_page->next; page;) {
6678 impl::xml_memory_page* next = page->next;
6679
6680 impl::xml_allocator::deallocate_page(page);
6681
6682 page = next;
6683 }
6684
6685#ifdef PUGIXML_COMPACT
6686 // destroy hash table
6687 static_cast<impl::xml_document_struct*>(_root)->hash.clear();
6688#endif
6689
6690 _root = 0;
6691}
6692
6693#ifndef PUGIXML_NO_STL
6694PUGI__FN xml_parse_result
6695xml_document::load(std::basic_istream<char, std::char_traits<char> >& stream,
6696 unsigned int options, xml_encoding encoding) {
6697 reset();
6698
6699 return impl::load_stream_impl(static_cast<impl::xml_document_struct*>(_root),
6700 stream, options, encoding, &_buffer);
6701}
6702
6703PUGI__FN xml_parse_result xml_document::load(
6704 std::basic_istream<wchar_t, std::char_traits<wchar_t> >& stream,
6705 unsigned int options) {
6706 reset();
6707
6708 return impl::load_stream_impl(static_cast<impl::xml_document_struct*>(_root),
6709 stream, options, encoding_wchar, &_buffer);
6710}
6711#endif
6712
6713PUGI__FN xml_parse_result xml_document::load_string(const char_t* contents,
6714 unsigned int options) {
6715 // Force native encoding (skip autodetection)
6716#ifdef PUGIXML_WCHAR_MODE
6717 xml_encoding encoding = encoding_wchar;
6718#else
6719 xml_encoding encoding = encoding_utf8;
6720#endif
6721
6722 return load_buffer(contents, impl::strlength(contents) * sizeof(char_t),
6723 options, encoding);
6724}
6725
6726PUGI__FN xml_parse_result xml_document::load(const char_t* contents,
6727 unsigned int options) {
6728 return load_string(contents, options);
6729}
6730
6731PUGI__FN xml_parse_result xml_document::load_file(const char* path_,
6732 unsigned int options,
6733 xml_encoding encoding) {
6734 reset();
6735
6736 using impl::auto_deleter; // MSVC7 workaround
6737 auto_deleter<FILE> file(fopen(path_, "rb"), impl::close_file);
6738
6739 return impl::load_file_impl(static_cast<impl::xml_document_struct*>(_root),
6740 file.data, options, encoding, &_buffer);
6741}
6742
6743PUGI__FN xml_parse_result xml_document::load_file(const wchar_t* path_,
6744 unsigned int options,
6745 xml_encoding encoding) {
6746 reset();
6747
6748 using impl::auto_deleter; // MSVC7 workaround
6749 auto_deleter<FILE> file(impl::open_file_wide(path_, L"rb"), impl::close_file);
6750
6751 return impl::load_file_impl(static_cast<impl::xml_document_struct*>(_root),
6752 file.data, options, encoding, &_buffer);
6753}
6754
6755PUGI__FN xml_parse_result xml_document::load_buffer(const void* contents,
6756 size_t size,
6757 unsigned int options,
6758 xml_encoding encoding) {
6759 reset();
6760
6761 return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root),
6762 _root, const_cast<void*>(contents), size,
6763 options, encoding, false, false, &_buffer);
6764}
6765
6766PUGI__FN xml_parse_result xml_document::load_buffer_inplace(
6767 void* contents, size_t size, unsigned int options, xml_encoding encoding) {
6768 reset();
6769
6770 return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root),
6771 _root, contents, size, options, encoding, true,
6772 false, &_buffer);
6773}
6774
6775PUGI__FN xml_parse_result xml_document::load_buffer_inplace_own(
6776 void* contents, size_t size, unsigned int options, xml_encoding encoding) {
6777 reset();
6778
6779 return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root),
6780 _root, contents, size, options, encoding, true,
6781 true, &_buffer);
6782}
6783
6784PUGI__FN void xml_document::save(xml_writer& writer, const char_t* indent,
6785 unsigned int flags,
6786 xml_encoding encoding) const {
6787 impl::xml_buffered_writer buffered_writer(writer, encoding);
6788
6789 if ((flags & format_write_bom) && encoding != encoding_latin1) {
6790 // BOM always represents the codepoint U+FEFF, so just write it in native
6791 // encoding
6792#ifdef PUGIXML_WCHAR_MODE
6793 unsigned int bom = 0xfeff;
6794 buffered_writer.write(static_cast<wchar_t>(bom));
6795#else
6796 buffered_writer.write('\xef', '\xbb', '\xbf');
6797#endif
6798 }
6799
6800 if (!(flags & format_no_declaration) && !impl::has_declaration(_root)) {
6801 buffered_writer.write_string(PUGIXML_TEXT("<?xml version=\"1.0\""));
6802 if (encoding == encoding_latin1)
6803 buffered_writer.write_string(PUGIXML_TEXT(" encoding=\"ISO-8859-1\""));
6804 buffered_writer.write('?', '>');
6805 if (!(flags & format_raw)) buffered_writer.write('\n');
6806 }
6807
6808 impl::node_output(buffered_writer, _root, indent, flags, 0);
6809
6810 buffered_writer.flush();
6811}
6812
6813#ifndef PUGIXML_NO_STL
6814PUGI__FN void xml_document::save(
6815 std::basic_ostream<char, std::char_traits<char> >& stream,
6816 const char_t* indent, unsigned int flags, xml_encoding encoding) const {
6817 xml_writer_stream writer(stream);
6818
6819 save(writer, indent, flags, encoding);
6820}
6821
6822PUGI__FN void xml_document::save(
6823 std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream,
6824 const char_t* indent, unsigned int flags) const {
6825 xml_writer_stream writer(stream);
6826
6827 save(writer, indent, flags, encoding_wchar);
6828}
6829#endif
6830
6831PUGI__FN bool xml_document::save_file(const char* path_, const char_t* indent,
6832 unsigned int flags,
6833 xml_encoding encoding) const {
6834 using impl::auto_deleter; // MSVC7 workaround
6835 auto_deleter<FILE> file(
6836 fopen(path_, (flags & format_save_file_text) ? "w" : "wb"),
6837 impl::close_file);
6838
6839 return impl::save_file_impl(*this, file.data, indent, flags, encoding);
6840}
6841
6842PUGI__FN bool xml_document::save_file(const wchar_t* path_,
6843 const char_t* indent, unsigned int flags,
6844 xml_encoding encoding) const {
6845 using impl::auto_deleter; // MSVC7 workaround
6846 auto_deleter<FILE> file(
6847 impl::open_file_wide(path_,
6848 (flags & format_save_file_text) ? L"w" : L"wb"),
6849 impl::close_file);
6850
6851 return impl::save_file_impl(*this, file.data, indent, flags, encoding);
6852}
6853
6854PUGI__FN xml_node xml_document::document_element() const {
6855 assert(_root);
6856
6857 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
6858 if (PUGI__NODETYPE(i) == node_element) return xml_node(i);
6859
6860 return xml_node();
6861}
6862
6863#ifndef PUGIXML_NO_STL
6864PUGI__FN std::string PUGIXML_FUNCTION as_utf8(const wchar_t* str) {
6865 assert(str);
6866
6867 return impl::as_utf8_impl(str, impl::strlength_wide(str));
6868}
6869
6870PUGI__FN std::string PUGIXML_FUNCTION
6871as_utf8(const std::basic_string<wchar_t>& str) {
6872 return impl::as_utf8_impl(str.c_str(), str.size());
6873}
6874
6875PUGI__FN std::basic_string<wchar_t> PUGIXML_FUNCTION as_wide(const char* str) {
6876 assert(str);
6877
6878 return impl::as_wide_impl(str, strlen(str));
6879}
6880
6881PUGI__FN std::basic_string<wchar_t> PUGIXML_FUNCTION
6882as_wide(const std::string& str) {
6883 return impl::as_wide_impl(str.c_str(), str.size());
6884}
6885#endif
6886
6887PUGI__FN void PUGIXML_FUNCTION set_memory_management_functions(
6888 allocation_function allocate, deallocation_function deallocate) {
6889 impl::xml_memory::allocate = allocate;
6890 impl::xml_memory::deallocate = deallocate;
6891}
6892
6893PUGI__FN allocation_function PUGIXML_FUNCTION get_memory_allocation_function() {
6894 return impl::xml_memory::allocate;
6895}
6896
6897PUGI__FN deallocation_function PUGIXML_FUNCTION
6898get_memory_deallocation_function() {
6899 return impl::xml_memory::deallocate;
6900}
6901} // namespace pugi
6902
6903#if !defined(PUGIXML_NO_STL) && (defined(_MSC_VER) || defined(__ICC))
6904namespace std {
6905// Workarounds for (non-standard) iterator category detection for older versions
6906// (MSVC7/IC8 and earlier)
6907PUGI__FN std::bidirectional_iterator_tag _Iter_cat(
6908 const pugi::xml_node_iterator&) {
6909 return std::bidirectional_iterator_tag();
6910}
6911
6912PUGI__FN std::bidirectional_iterator_tag _Iter_cat(
6914 return std::bidirectional_iterator_tag();
6915}
6916
6917PUGI__FN std::bidirectional_iterator_tag _Iter_cat(
6919 return std::bidirectional_iterator_tag();
6920}
6921} // namespace std
6922#endif
6923
6924#if !defined(PUGIXML_NO_STL) && defined(__SUNPRO_CC)
6925namespace std {
6926// Workarounds for (non-standard) iterator category detection
6927PUGI__FN std::bidirectional_iterator_tag __iterator_category(
6928 const pugi::xml_node_iterator&) {
6929 return std::bidirectional_iterator_tag();
6930}
6931
6932PUGI__FN std::bidirectional_iterator_tag __iterator_category(
6934 return std::bidirectional_iterator_tag();
6935}
6936
6937PUGI__FN std::bidirectional_iterator_tag __iterator_category(
6939 return std::bidirectional_iterator_tag();
6940}
6941} // namespace std
6942#endif
6943
6944#ifndef PUGIXML_NO_XPATH
6945// STL replacements
6946PUGI__NS_BEGIN
6947struct equal_to {
6948 template <typename T>
6949 bool operator()(const T& lhs, const T& rhs) const {
6950 return lhs == rhs;
6951 }
6952};
6953
6955 template <typename T>
6956 bool operator()(const T& lhs, const T& rhs) const {
6957 return lhs != rhs;
6958 }
6959};
6960
6961struct less {
6962 template <typename T>
6963 bool operator()(const T& lhs, const T& rhs) const {
6964 return lhs < rhs;
6965 }
6966};
6967
6969 template <typename T>
6970 bool operator()(const T& lhs, const T& rhs) const {
6971 return lhs <= rhs;
6972 }
6973};
6974
6975template <typename T>
6976void swap(T& lhs, T& rhs) {
6977 T temp = lhs;
6978 lhs = rhs;
6979 rhs = temp;
6980}
6981
6982template <typename I, typename Pred>
6983I min_element(I begin, I end, const Pred& pred) {
6984 I result = begin;
6985
6986 for (I it = begin + 1; it != end; ++it)
6987 if (pred(*it, *result)) result = it;
6988
6989 return result;
6990}
6991
6992template <typename I>
6993void reverse(I begin, I end) {
6994 while (end - begin > 1) swap(*begin++, *--end);
6995}
6996
6997template <typename I>
6998I unique(I begin, I end) {
6999 // fast skip head
7000 while (end - begin > 1 && *begin != *(begin + 1)) begin++;
7001
7002 if (begin == end) return begin;
7003
7004 // last written element
7005 I write = begin++;
7006
7007 // merge unique elements
7008 while (begin != end) {
7009 if (*begin != *write)
7010 *++write = *begin++;
7011 else
7012 begin++;
7013 }
7014
7015 // past-the-end (write points to live element)
7016 return write + 1;
7017}
7018
7019template <typename I>
7020void copy_backwards(I begin, I end, I target) {
7021 while (begin != end) *--target = *--end;
7022}
7023
7024template <typename I, typename Pred, typename T>
7025void insertion_sort(I begin, I end, const Pred& pred, T*) {
7026 assert(begin != end);
7027
7028 for (I it = begin + 1; it != end; ++it) {
7029 T val = *it;
7030
7031 if (pred(val, *begin)) {
7032 // move to front
7033 copy_backwards(begin, it, it + 1);
7034 *begin = val;
7035 } else {
7036 I hole = it;
7037
7038 // move hole backwards
7039 while (pred(val, *(hole - 1))) {
7040 *hole = *(hole - 1);
7041 hole--;
7042 }
7043
7044 // fill hole with element
7045 *hole = val;
7046 }
7047 }
7048}
7049
7050// std variant for elements with ==
7051template <typename I, typename Pred>
7052void partition(I begin, I middle, I end, const Pred& pred, I* out_eqbeg,
7053 I* out_eqend) {
7054 I eqbeg = middle, eqend = middle + 1;
7055
7056 // expand equal range
7057 while (eqbeg != begin && *(eqbeg - 1) == *eqbeg) --eqbeg;
7058 while (eqend != end && *eqend == *eqbeg) ++eqend;
7059
7060 // process outer elements
7061 I ltend = eqbeg, gtbeg = eqend;
7062
7063 for (;;) {
7064 // find the element from the right side that belongs to the left one
7065 for (; gtbeg != end; ++gtbeg)
7066 if (!pred(*eqbeg, *gtbeg)) {
7067 if (*gtbeg == *eqbeg)
7068 swap(*gtbeg, *eqend++);
7069 else
7070 break;
7071 }
7072
7073 // find the element from the left side that belongs to the right one
7074 for (; ltend != begin; --ltend)
7075 if (!pred(*(ltend - 1), *eqbeg)) {
7076 if (*eqbeg == *(ltend - 1))
7077 swap(*(ltend - 1), *--eqbeg);
7078 else
7079 break;
7080 }
7081
7082 // scanned all elements
7083 if (gtbeg == end && ltend == begin) {
7084 *out_eqbeg = eqbeg;
7085 *out_eqend = eqend;
7086 return;
7087 }
7088
7089 // make room for elements by moving equal area
7090 if (gtbeg == end) {
7091 if (--ltend != --eqbeg) swap(*ltend, *eqbeg);
7092 swap(*eqbeg, *--eqend);
7093 } else if (ltend == begin) {
7094 if (eqend != gtbeg) swap(*eqbeg, *eqend);
7095 ++eqend;
7096 swap(*gtbeg++, *eqbeg++);
7097 } else
7098 swap(*gtbeg++, *--ltend);
7099 }
7100}
7101
7102template <typename I, typename Pred>
7103void median3(I first, I middle, I last, const Pred& pred) {
7104 if (pred(*middle, *first)) swap(*middle, *first);
7105 if (pred(*last, *middle)) swap(*last, *middle);
7106 if (pred(*middle, *first)) swap(*middle, *first);
7107}
7108
7109template <typename I, typename Pred>
7110void median(I first, I middle, I last, const Pred& pred) {
7111 if (last - first <= 40) {
7112 // median of three for small chunks
7113 median3(first, middle, last, pred);
7114 } else {
7115 // median of nine
7116 size_t step = (last - first + 1) / 8;
7117
7118 median3(first, first + step, first + 2 * step, pred);
7119 median3(middle - step, middle, middle + step, pred);
7120 median3(last - 2 * step, last - step, last, pred);
7121 median3(first + step, middle, last - step, pred);
7122 }
7123}
7124
7125template <typename I, typename Pred>
7126void sort(I begin, I end, const Pred& pred) {
7127 // sort large chunks
7128 while (end - begin > 32) {
7129 // find median element
7130 I middle = begin + (end - begin) / 2;
7131 median(begin, middle, end - 1, pred);
7132
7133 // partition in three chunks (< = >)
7134 I eqbeg, eqend;
7135 partition(begin, middle, end, pred, &eqbeg, &eqend);
7136
7137 // loop on larger half
7138 if (eqbeg - begin > end - eqend) {
7139 sort(eqend, end, pred);
7140 end = eqbeg;
7141 } else {
7142 sort(begin, eqbeg, pred);
7143 begin = eqend;
7144 }
7145 }
7146
7147 // insertion sort small chunk
7148 if (begin != end) insertion_sort(begin, end, pred, &*begin);
7149}
7150PUGI__NS_END
7151
7152// Allocator used for AST and evaluation stacks
7153PUGI__NS_BEGIN
7154static const size_t xpath_memory_page_size =
7155#ifdef PUGIXML_MEMORY_XPATH_PAGE_SIZE
7156 PUGIXML_MEMORY_XPATH_PAGE_SIZE
7157#else
7158 4096
7159#endif
7160 ;
7161
7162static const uintptr_t xpath_memory_block_alignment = sizeof(double) >
7163 sizeof(void*)
7164 ? sizeof(double)
7165 : sizeof(void*);
7166
7168 xpath_memory_block* next;
7169 size_t capacity;
7170
7171 union {
7172 char data[xpath_memory_page_size];
7173 double alignment;
7174 };
7175};
7176
7178 xpath_memory_block* _root;
7179 size_t _root_size;
7180
7181public:
7182#ifdef PUGIXML_NO_EXCEPTIONS
7183 jmp_buf* error_handler;
7184#endif
7185
7186 xpath_allocator(xpath_memory_block* root, size_t root_size = 0)
7187 : _root(root), _root_size(root_size) {
7188#ifdef PUGIXML_NO_EXCEPTIONS
7189 error_handler = 0;
7190#endif
7191 }
7192
7193 void* allocate_nothrow(size_t size) {
7194 // round size up to block alignment boundary
7195 size = (size + xpath_memory_block_alignment - 1) &
7196 ~(xpath_memory_block_alignment - 1);
7197
7198 if (_root_size + size <= _root->capacity) {
7199 void* buf = &_root->data[0] + _root_size;
7200 _root_size += size;
7201 return buf;
7202 } else {
7203 // make sure we have at least 1/4th of the page free after allocation to
7204 // satisfy subsequent allocation requests
7205 size_t block_capacity_base = sizeof(_root->data);
7206 size_t block_capacity_req = size + block_capacity_base / 4;
7207 size_t block_capacity = (block_capacity_base > block_capacity_req)
7208 ? block_capacity_base
7209 : block_capacity_req;
7210
7211 size_t block_size = block_capacity + offsetof(xpath_memory_block, data);
7212
7214 static_cast<xpath_memory_block*>(xml_memory::allocate(block_size));
7215 if (!block) return 0;
7216
7217 block->next = _root;
7218 block->capacity = block_capacity;
7219
7220 _root = block;
7221 _root_size = size;
7222
7223 return block->data;
7224 }
7225 }
7226
7227 void* allocate(size_t size) {
7228 void* result = allocate_nothrow(size);
7229
7230 if (!result) {
7231#ifdef PUGIXML_NO_EXCEPTIONS
7232 assert(error_handler);
7233 longjmp(*error_handler, 1);
7234#else
7235 throw std::bad_alloc();
7236#endif
7237 }
7238
7239 return result;
7240 }
7241
7242 void* reallocate(void* ptr, size_t old_size, size_t new_size) {
7243 // round size up to block alignment boundary
7244 old_size = (old_size + xpath_memory_block_alignment - 1) &
7245 ~(xpath_memory_block_alignment - 1);
7246 new_size = (new_size + xpath_memory_block_alignment - 1) &
7247 ~(xpath_memory_block_alignment - 1);
7248
7249 // we can only reallocate the last object
7250 assert(ptr == 0 ||
7251 static_cast<char*>(ptr) + old_size == &_root->data[0] + _root_size);
7252
7253 // adjust root size so that we have not allocated the object at all
7254 bool only_object = (_root_size == old_size);
7255
7256 if (ptr) _root_size -= old_size;
7257
7258 // allocate a new version (this will obviously reuse the memory if possible)
7259 void* result = allocate(new_size);
7260 assert(result);
7261
7262 // we have a new block
7263 if (result != ptr && ptr) {
7264 // copy old data
7265 assert(new_size >= old_size);
7266 memcpy(result, ptr, old_size);
7267
7268 // free the previous page if it had no other objects
7269 if (only_object) {
7270 assert(_root->data == result);
7271 assert(_root->next);
7272
7273 xpath_memory_block* next = _root->next->next;
7274
7275 if (next) {
7276 // deallocate the whole page, unless it was the first one
7277 xml_memory::deallocate(_root->next);
7278 _root->next = next;
7279 }
7280 }
7281 }
7282
7283 return result;
7284 }
7285
7286 void revert(const xpath_allocator& state) {
7287 // free all new pages
7288 xpath_memory_block* cur = _root;
7289
7290 while (cur != state._root) {
7291 xpath_memory_block* next = cur->next;
7292
7293 xml_memory::deallocate(cur);
7294
7295 cur = next;
7296 }
7297
7298 // restore state
7299 _root = state._root;
7300 _root_size = state._root_size;
7301 }
7302
7303 void release() {
7304 xpath_memory_block* cur = _root;
7305 assert(cur);
7306
7307 while (cur->next) {
7308 xpath_memory_block* next = cur->next;
7309
7310 xml_memory::deallocate(cur);
7311
7312 cur = next;
7313 }
7314 }
7315};
7316
7319 : _target(alloc), _state(*alloc) {}
7320
7321 ~xpath_allocator_capture() { _target->revert(_state); }
7322
7323 xpath_allocator* _target;
7324 xpath_allocator _state;
7325};
7326
7328 xpath_allocator* result;
7329 xpath_allocator* temp;
7330};
7331
7333 xpath_memory_block blocks[2];
7334 xpath_allocator result;
7335 xpath_allocator temp;
7336 xpath_stack stack;
7337
7338#ifdef PUGIXML_NO_EXCEPTIONS
7339 jmp_buf error_handler;
7340#endif
7341
7342 xpath_stack_data() : result(blocks + 0), temp(blocks + 1) {
7343 blocks[0].next = blocks[1].next = 0;
7344 blocks[0].capacity = blocks[1].capacity = sizeof(blocks[0].data);
7345
7346 stack.result = &result;
7347 stack.temp = &temp;
7348
7349#ifdef PUGIXML_NO_EXCEPTIONS
7350 result.error_handler = temp.error_handler = &error_handler;
7351#endif
7352 }
7353
7355 result.release();
7356 temp.release();
7357 }
7358};
7359PUGI__NS_END
7360
7361// String class
7362PUGI__NS_BEGIN
7364 const char_t* _buffer;
7365 bool _uses_heap;
7366 size_t _length_heap;
7367
7368 static char_t* duplicate_string(const char_t* string, size_t length,
7369 xpath_allocator* alloc) {
7370 char_t* result =
7371 static_cast<char_t*>(alloc->allocate((length + 1) * sizeof(char_t)));
7372 assert(result);
7373
7374 memcpy(result, string, length * sizeof(char_t));
7375 result[length] = 0;
7376
7377 return result;
7378 }
7379
7380 xpath_string(const char_t* buffer, bool uses_heap_, size_t length_heap)
7381 : _buffer(buffer), _uses_heap(uses_heap_), _length_heap(length_heap) {}
7382
7383public:
7384 static xpath_string from_const(const char_t* str) {
7385 return xpath_string(str, false, 0);
7386 }
7387
7388 static xpath_string from_heap_preallocated(const char_t* begin,
7389 const char_t* end) {
7390 assert(begin <= end && *end == 0);
7391
7392 return xpath_string(begin, true, static_cast<size_t>(end - begin));
7393 }
7394
7395 static xpath_string from_heap(const char_t* begin, const char_t* end,
7396 xpath_allocator* alloc) {
7397 assert(begin <= end);
7398
7399 size_t length = static_cast<size_t>(end - begin);
7400
7401 return length == 0 ? xpath_string()
7402 : xpath_string(duplicate_string(begin, length, alloc),
7403 true, length);
7404 }
7405
7406 xpath_string()
7407 : _buffer(PUGIXML_TEXT("")), _uses_heap(false), _length_heap(0) {}
7408
7409 void append(const xpath_string& o, xpath_allocator* alloc) {
7410 // skip empty sources
7411 if (!*o._buffer) return;
7412
7413 // fast append for constant empty target and constant source
7414 if (!*_buffer && !_uses_heap && !o._uses_heap) {
7415 _buffer = o._buffer;
7416 } else {
7417 // need to make heap copy
7418 size_t target_length = length();
7419 size_t source_length = o.length();
7420 size_t result_length = target_length + source_length;
7421
7422 // allocate new buffer
7423 char_t* result = static_cast<char_t*>(
7424 alloc->reallocate(_uses_heap ? const_cast<char_t*>(_buffer) : 0,
7425 (target_length + 1) * sizeof(char_t),
7426 (result_length + 1) * sizeof(char_t)));
7427 assert(result);
7428
7429 // append first string to the new buffer in case there was no reallocation
7430 if (!_uses_heap) memcpy(result, _buffer, target_length * sizeof(char_t));
7431
7432 // append second string to the new buffer
7433 memcpy(result + target_length, o._buffer, source_length * sizeof(char_t));
7434 result[result_length] = 0;
7435
7436 // finalize
7437 _buffer = result;
7438 _uses_heap = true;
7439 _length_heap = result_length;
7440 }
7441 }
7442
7443 const char_t* c_str() const { return _buffer; }
7444
7445 size_t length() const {
7446 return _uses_heap ? _length_heap : strlength(_buffer);
7447 }
7448
7449 char_t* data(xpath_allocator* alloc) {
7450 // make private heap copy
7451 if (!_uses_heap) {
7452 size_t length_ = strlength(_buffer);
7453
7454 _buffer = duplicate_string(_buffer, length_, alloc);
7455 _uses_heap = true;
7456 _length_heap = length_;
7457 }
7458
7459 return const_cast<char_t*>(_buffer);
7460 }
7461
7462 bool empty() const { return *_buffer == 0; }
7463
7464 bool operator==(const xpath_string& o) const {
7465 return strequal(_buffer, o._buffer);
7466 }
7467
7468 bool operator!=(const xpath_string& o) const {
7469 return !strequal(_buffer, o._buffer);
7470 }
7471
7472 bool uses_heap() const { return _uses_heap; }
7473};
7474PUGI__NS_END
7475
7476PUGI__NS_BEGIN
7477PUGI__FN bool starts_with(const char_t* string, const char_t* pattern) {
7478 while (*pattern && *string == *pattern) {
7479 string++;
7480 pattern++;
7481 }
7482
7483 return *pattern == 0;
7484}
7485
7486PUGI__FN const char_t* find_char(const char_t* s, char_t c) {
7487#ifdef PUGIXML_WCHAR_MODE
7488 return wcschr(s, c);
7489#else
7490 return strchr(s, c);
7491#endif
7492}
7493
7494PUGI__FN const char_t* find_substring(const char_t* s, const char_t* p) {
7495#ifdef PUGIXML_WCHAR_MODE
7496 // MSVC6 wcsstr bug workaround (if s is empty it always returns 0)
7497 return (*p == 0) ? s : wcsstr(s, p);
7498#else
7499 return strstr(s, p);
7500#endif
7501}
7502
7503// Converts symbol to lower case, if it is an ASCII one
7504PUGI__FN char_t tolower_ascii(char_t ch) {
7505 return static_cast<unsigned int>(ch - 'A') < 26
7506 ? static_cast<char_t>(ch | ' ')
7507 : ch;
7508}
7509
7510PUGI__FN xpath_string string_value(const xpath_node& na,
7511 xpath_allocator* alloc) {
7512 if (na.attribute())
7513 return xpath_string::from_const(na.attribute().value());
7514 else {
7515 xml_node n = na.node();
7516
7517 switch (n.type()) {
7518 case node_pcdata:
7519 case node_cdata:
7520 case node_comment:
7521 case node_pi:
7522 return xpath_string::from_const(n.value());
7523
7524 case node_document:
7525 case node_element: {
7526 xpath_string result;
7527
7528 // element nodes can have value if parse_embed_pcdata was used
7529 if (n.value()[0])
7530 result.append(xpath_string::from_const(n.value()), alloc);
7531
7532 xml_node cur = n.first_child();
7533
7534 while (cur && cur != n) {
7535 if (cur.type() == node_pcdata || cur.type() == node_cdata)
7536 result.append(xpath_string::from_const(cur.value()), alloc);
7537
7538 if (cur.first_child())
7539 cur = cur.first_child();
7540 else if (cur.next_sibling())
7541 cur = cur.next_sibling();
7542 else {
7543 while (!cur.next_sibling() && cur != n) cur = cur.parent();
7544
7545 if (cur != n) cur = cur.next_sibling();
7546 }
7547 }
7548
7549 return result;
7550 }
7551
7552 default:
7553 return xpath_string();
7554 }
7555 }
7556}
7557
7558PUGI__FN bool node_is_before_sibling(xml_node_struct* ln, xml_node_struct* rn) {
7559 assert(ln->parent == rn->parent);
7560
7561 // there is no common ancestor (the shared parent is null), nodes are from
7562 // different documents
7563 if (!ln->parent) return ln < rn;
7564
7565 // determine sibling order
7566 xml_node_struct* ls = ln;
7567 xml_node_struct* rs = rn;
7568
7569 while (ls && rs) {
7570 if (ls == rn) return true;
7571 if (rs == ln) return false;
7572
7573 ls = ls->next_sibling;
7574 rs = rs->next_sibling;
7575 }
7576
7577 // if rn sibling chain ended ln must be before rn
7578 return !rs;
7579}
7580
7581PUGI__FN bool node_is_before(xml_node_struct* ln, xml_node_struct* rn) {
7582 // find common ancestor at the same depth, if any
7583 xml_node_struct* lp = ln;
7584 xml_node_struct* rp = rn;
7585
7586 while (lp && rp && lp->parent != rp->parent) {
7587 lp = lp->parent;
7588 rp = rp->parent;
7589 }
7590
7591 // parents are the same!
7592 if (lp && rp) return node_is_before_sibling(lp, rp);
7593
7594 // nodes are at different depths, need to normalize heights
7595 bool left_higher = !lp;
7596
7597 while (lp) {
7598 lp = lp->parent;
7599 ln = ln->parent;
7600 }
7601
7602 while (rp) {
7603 rp = rp->parent;
7604 rn = rn->parent;
7605 }
7606
7607 // one node is the ancestor of the other
7608 if (ln == rn) return left_higher;
7609
7610 // find common ancestor... again
7611 while (ln->parent != rn->parent) {
7612 ln = ln->parent;
7613 rn = rn->parent;
7614 }
7615
7616 return node_is_before_sibling(ln, rn);
7617}
7618
7619PUGI__FN bool node_is_ancestor(xml_node_struct* parent, xml_node_struct* node) {
7620 while (node && node != parent) node = node->parent;
7621
7622 return parent && node == parent;
7623}
7624
7625PUGI__FN const void* document_buffer_order(const xpath_node& xnode) {
7626 xml_node_struct* node = xnode.node().internal_object();
7627
7628 if (node) {
7629 if ((get_document(node).header & xml_memory_page_contents_shared_mask) ==
7630 0) {
7631 if (node->name &&
7632 (node->header &
7633 impl::xml_memory_page_name_allocated_or_shared_mask) == 0)
7634 return node->name;
7635 if (node->value &&
7636 (node->header &
7637 impl::xml_memory_page_value_allocated_or_shared_mask) == 0)
7638 return node->value;
7639 }
7640
7641 return 0;
7642 }
7643
7644 xml_attribute_struct* attr = xnode.attribute().internal_object();
7645
7646 if (attr) {
7647 if ((get_document(attr).header & xml_memory_page_contents_shared_mask) ==
7648 0) {
7649 if ((attr->header &
7650 impl::xml_memory_page_name_allocated_or_shared_mask) == 0)
7651 return attr->name;
7652 if ((attr->header &
7653 impl::xml_memory_page_value_allocated_or_shared_mask) == 0)
7654 return attr->value;
7655 }
7656
7657 return 0;
7658 }
7659
7660 return 0;
7661}
7662
7664 bool operator()(const xpath_node& lhs, const xpath_node& rhs) const {
7665 // optimized document order based check
7666 const void* lo = document_buffer_order(lhs);
7667 const void* ro = document_buffer_order(rhs);
7668
7669 if (lo && ro) return lo < ro;
7670
7671 // slow comparison
7672 xml_node ln = lhs.node(), rn = rhs.node();
7673
7674 // compare attributes
7675 if (lhs.attribute() && rhs.attribute()) {
7676 // shared parent
7677 if (lhs.parent() == rhs.parent()) {
7678 // determine sibling order
7679 for (xml_attribute a = lhs.attribute(); a; a = a.next_attribute())
7680 if (a == rhs.attribute()) return true;
7681
7682 return false;
7683 }
7684
7685 // compare attribute parents
7686 ln = lhs.parent();
7687 rn = rhs.parent();
7688 } else if (lhs.attribute()) {
7689 // attributes go after the parent element
7690 if (lhs.parent() == rhs.node()) return false;
7691
7692 ln = lhs.parent();
7693 } else if (rhs.attribute()) {
7694 // attributes go after the parent element
7695 if (rhs.parent() == lhs.node()) return true;
7696
7697 rn = rhs.parent();
7698 }
7699
7700 if (ln == rn) return false;
7701
7702 if (!ln || !rn) return ln < rn;
7703
7704 return node_is_before(ln.internal_object(), rn.internal_object());
7705 }
7706};
7707
7709 bool operator()(const xpath_node& lhs, const xpath_node& rhs) const {
7710 if (lhs.attribute())
7711 return rhs.attribute() ? lhs.attribute() < rhs.attribute() : true;
7712 else
7713 return rhs.attribute() ? false : lhs.node() < rhs.node();
7714 }
7715};
7716
7717PUGI__FN double gen_nan() {
7718#if defined(__STDC_IEC_559__) || \
7719 ((FLT_RADIX - 0 == 2) && (FLT_MAX_EXP - 0 == 128) && \
7720 (FLT_MANT_DIG - 0 == 24))
7721 PUGI__STATIC_ASSERT(sizeof(float) == sizeof(uint32_t));
7722 typedef uint32_t UI; // BCC5 workaround
7723 union {
7724 float f;
7725 UI i;
7726 } u;
7727 u.i = 0x7fc00000;
7728 return u.f;
7729#else
7730 // fallback
7731 const volatile double zero = 0.0;
7732 return zero / zero;
7733#endif
7734}
7735
7736PUGI__FN bool is_nan(double value) {
7737#if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__)
7738 return !!_isnan(value);
7739#elif defined(fpclassify) && defined(FP_NAN)
7740 return fpclassify(value) == FP_NAN;
7741#else
7742 // fallback
7743 const volatile double v = value;
7744 return v != v;
7745#endif
7746}
7747
7748PUGI__FN const char_t* convert_number_to_string_special(double value) {
7749#if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__)
7750 if (_finite(value)) return (value == 0) ? PUGIXML_TEXT("0") : 0;
7751 if (_isnan(value)) return PUGIXML_TEXT("NaN");
7752 return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
7753#elif defined(fpclassify) && defined(FP_NAN) && defined(FP_INFINITE) && \
7754 defined(FP_ZERO)
7755 switch (fpclassify(value)) {
7756 case FP_NAN:
7757 return PUGIXML_TEXT("NaN");
7758
7759 case FP_INFINITE:
7760 return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
7761
7762 case FP_ZERO:
7763 return PUGIXML_TEXT("0");
7764
7765 default:
7766 return 0;
7767 }
7768#else
7769 // fallback
7770 const volatile double v = value;
7771
7772 if (v == 0) return PUGIXML_TEXT("0");
7773 if (v != v) return PUGIXML_TEXT("NaN");
7774 if (v * 2 == v)
7775 return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
7776 return 0;
7777#endif
7778}
7779
7780PUGI__FN bool convert_number_to_boolean(double value) {
7781 return (value != 0 && !is_nan(value));
7782}
7783
7784PUGI__FN void truncate_zeros(char* begin, char* end) {
7785 while (begin != end && end[-1] == '0') end--;
7786
7787 *end = 0;
7788}
7789
7790// gets mantissa digits in the form of 0.xxxxx with 0. implied and the exponent
7791#if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 && \
7792 !defined(_WIN32_WCE)
7793PUGI__FN void convert_number_to_mantissa_exponent(double value, char* buffer,
7794 size_t buffer_size,
7795 char** out_mantissa,
7796 int* out_exponent) {
7797 // get base values
7798 int sign, exponent;
7799 _ecvt_s(buffer, buffer_size, value, DBL_DIG + 1, &exponent, &sign);
7800
7801 // truncate redundant zeros
7802 truncate_zeros(buffer, buffer + strlen(buffer));
7803
7804 // fill results
7805 *out_mantissa = buffer;
7806 *out_exponent = exponent;
7807}
7808#else
7809PUGI__FN void convert_number_to_mantissa_exponent(double value, char* buffer,
7810 size_t buffer_size,
7811 char** out_mantissa,
7812 int* out_exponent) {
7813 // get a scientific notation value with IEEE DBL_DIG decimals
7814 sprintf(buffer, "%.*e", DBL_DIG, value);
7815 assert(strlen(buffer) < buffer_size);
7816 (void)!buffer_size;
7817
7818 // get the exponent (possibly negative)
7819 char* exponent_string = strchr(buffer, 'e');
7820 assert(exponent_string);
7821
7822 int exponent = atoi(exponent_string + 1);
7823
7824 // extract mantissa string: skip sign
7825 char* mantissa = buffer[0] == '-' ? buffer + 1 : buffer;
7826 assert(mantissa[0] != '0' && mantissa[1] == '.');
7827
7828 // divide mantissa by 10 to eliminate integer part
7829 mantissa[1] = mantissa[0];
7830 mantissa++;
7831 exponent++;
7832
7833 // remove extra mantissa digits and zero-terminate mantissa
7834 truncate_zeros(mantissa, exponent_string);
7835
7836 // fill results
7837 *out_mantissa = mantissa;
7838 *out_exponent = exponent;
7839}
7840#endif
7841
7842PUGI__FN xpath_string convert_number_to_string(double value,
7843 xpath_allocator* alloc) {
7844 // try special number conversion
7845 const char_t* special = convert_number_to_string_special(value);
7846 if (special) return xpath_string::from_const(special);
7847
7848 // get mantissa + exponent form
7849 char mantissa_buffer[32];
7850
7851 char* mantissa;
7852 int exponent;
7853 convert_number_to_mantissa_exponent(
7854 value, mantissa_buffer, sizeof(mantissa_buffer), &mantissa, &exponent);
7855
7856 // allocate a buffer of suitable length for the number
7857 size_t result_size =
7858 strlen(mantissa_buffer) + (exponent > 0 ? exponent : -exponent) + 4;
7859 char_t* result =
7860 static_cast<char_t*>(alloc->allocate(sizeof(char_t) * result_size));
7861 assert(result);
7862
7863 // make the number!
7864 char_t* s = result;
7865
7866 // sign
7867 if (value < 0) *s++ = '-';
7868
7869 // integer part
7870 if (exponent <= 0) {
7871 *s++ = '0';
7872 } else {
7873 while (exponent > 0) {
7874 assert(*mantissa == 0 ||
7875 static_cast<unsigned int>(static_cast<unsigned int>(*mantissa) -
7876 '0') <= 9);
7877 *s++ = *mantissa ? *mantissa++ : '0';
7878 exponent--;
7879 }
7880 }
7881
7882 // fractional part
7883 if (*mantissa) {
7884 // decimal point
7885 *s++ = '.';
7886
7887 // extra zeroes from negative exponent
7888 while (exponent < 0) {
7889 *s++ = '0';
7890 exponent++;
7891 }
7892
7893 // extra mantissa digits
7894 while (*mantissa) {
7895 assert(static_cast<unsigned int>(*mantissa - '0') <= 9);
7896 *s++ = *mantissa++;
7897 }
7898 }
7899
7900 // zero-terminate
7901 assert(s < result + result_size);
7902 *s = 0;
7903
7904 return xpath_string::from_heap_preallocated(result, s);
7905}
7906
7907PUGI__FN bool check_string_to_number_format(const char_t* string) {
7908 // parse leading whitespace
7909 while (PUGI__IS_CHARTYPE(*string, ct_space)) ++string;
7910
7911 // parse sign
7912 if (*string == '-') ++string;
7913
7914 if (!*string) return false;
7915
7916 // if there is no integer part, there should be a decimal part with at least
7917 // one digit
7918 if (!PUGI__IS_CHARTYPEX(string[0], ctx_digit) &&
7919 (string[0] != '.' || !PUGI__IS_CHARTYPEX(string[1], ctx_digit)))
7920 return false;
7921
7922 // parse integer part
7923 while (PUGI__IS_CHARTYPEX(*string, ctx_digit)) ++string;
7924
7925 // parse decimal part
7926 if (*string == '.') {
7927 ++string;
7928
7929 while (PUGI__IS_CHARTYPEX(*string, ctx_digit)) ++string;
7930 }
7931
7932 // parse trailing whitespace
7933 while (PUGI__IS_CHARTYPE(*string, ct_space)) ++string;
7934
7935 return *string == 0;
7936}
7937
7938PUGI__FN double convert_string_to_number(const char_t* string) {
7939 // check string format
7940 if (!check_string_to_number_format(string)) return gen_nan();
7941
7942 // parse string
7943#ifdef PUGIXML_WCHAR_MODE
7944 return wcstod(string, 0);
7945#else
7946 return strtod(string, 0);
7947#endif
7948}
7949
7950PUGI__FN bool convert_string_to_number_scratch(char_t (&buffer)[32],
7951 const char_t* begin,
7952 const char_t* end,
7953 double* out_result) {
7954 size_t length = static_cast<size_t>(end - begin);
7955 char_t* scratch = buffer;
7956
7957 if (length >= sizeof(buffer) / sizeof(buffer[0])) {
7958 // need to make dummy on-heap copy
7959 scratch = static_cast<char_t*>(
7960 xml_memory::allocate((length + 1) * sizeof(char_t)));
7961 if (!scratch) return false;
7962 }
7963
7964 // copy string to zero-terminated buffer and perform conversion
7965 memcpy(scratch, begin, length * sizeof(char_t));
7966 scratch[length] = 0;
7967
7968 *out_result = convert_string_to_number(scratch);
7969
7970 // free dummy buffer
7971 if (scratch != buffer) xml_memory::deallocate(scratch);
7972
7973 return true;
7974}
7975
7976PUGI__FN double round_nearest(double value) { return floor(value + 0.5); }
7977
7978PUGI__FN double round_nearest_nzero(double value) {
7979 // same as round_nearest, but returns -0 for [-0.5, -0]
7980 // ceil is used to differentiate between +0 and -0 (we return -0 for [-0.5,
7981 // -0] and +0 for +0)
7982 return (value >= -0.5 && value <= 0) ? ceil(value) : floor(value + 0.5);
7983}
7984
7985PUGI__FN const char_t* qualified_name(const xpath_node& node) {
7986 return node.attribute() ? node.attribute().name() : node.node().name();
7987}
7988
7989PUGI__FN const char_t* local_name(const xpath_node& node) {
7990 const char_t* name = qualified_name(node);
7991 const char_t* p = find_char(name, ':');
7992
7993 return p ? p + 1 : name;
7994}
7995
7997 const char_t* prefix;
7998 size_t prefix_length;
7999
8000 namespace_uri_predicate(const char_t* name) {
8001 const char_t* pos = find_char(name, ':');
8002
8003 prefix = pos ? name : 0;
8004 prefix_length = pos ? static_cast<size_t>(pos - name) : 0;
8005 }
8006
8007 bool operator()(xml_attribute a) const {
8008 const char_t* name = a.name();
8009
8010 if (!starts_with(name, PUGIXML_TEXT("xmlns"))) return false;
8011
8012 return prefix ? name[5] == ':' &&
8013 strequalrange(name + 6, prefix, prefix_length)
8014 : name[5] == 0;
8015 }
8016};
8017
8018PUGI__FN const char_t* namespace_uri(xml_node node) {
8019 namespace_uri_predicate pred = node.name();
8020
8021 xml_node p = node;
8022
8023 while (p) {
8024 xml_attribute a = p.find_attribute(pred);
8025
8026 if (a) return a.value();
8027
8028 p = p.parent();
8029 }
8030
8031 return PUGIXML_TEXT("");
8032}
8033
8034PUGI__FN const char_t* namespace_uri(xml_attribute attr, xml_node parent) {
8035 namespace_uri_predicate pred = attr.name();
8036
8037 // Default namespace does not apply to attributes
8038 if (!pred.prefix) return PUGIXML_TEXT("");
8039
8040 xml_node p = parent;
8041
8042 while (p) {
8043 xml_attribute a = p.find_attribute(pred);
8044
8045 if (a) return a.value();
8046
8047 p = p.parent();
8048 }
8049
8050 return PUGIXML_TEXT("");
8051}
8052
8053PUGI__FN const char_t* namespace_uri(const xpath_node& node) {
8054 return node.attribute() ? namespace_uri(node.attribute(), node.parent())
8055 : namespace_uri(node.node());
8056}
8057
8058PUGI__FN char_t* normalize_space(char_t* buffer) {
8059 char_t* write = buffer;
8060
8061 for (char_t* it = buffer; *it;) {
8062 char_t ch = *it++;
8063
8064 if (PUGI__IS_CHARTYPE(ch, ct_space)) {
8065 // replace whitespace sequence with single space
8066 while (PUGI__IS_CHARTYPE(*it, ct_space)) it++;
8067
8068 // avoid leading spaces
8069 if (write != buffer) *write++ = ' ';
8070 } else
8071 *write++ = ch;
8072 }
8073
8074 // remove trailing space
8075 if (write != buffer && PUGI__IS_CHARTYPE(write[-1], ct_space)) write--;
8076
8077 // zero-terminate
8078 *write = 0;
8079
8080 return write;
8081}
8082
8083PUGI__FN char_t* translate(char_t* buffer, const char_t* from, const char_t* to,
8084 size_t to_length) {
8085 char_t* write = buffer;
8086
8087 while (*buffer) {
8088 PUGI__DMC_VOLATILE char_t ch = *buffer++;
8089
8090 const char_t* pos = find_char(from, ch);
8091
8092 if (!pos)
8093 *write++ = ch; // do not process
8094 else if (static_cast<size_t>(pos - from) < to_length)
8095 *write++ = to[pos - from]; // replace
8096 }
8097
8098 // zero-terminate
8099 *write = 0;
8100
8101 return write;
8102}
8103
8104PUGI__FN unsigned char* translate_table_generate(xpath_allocator* alloc,
8105 const char_t* from,
8106 const char_t* to) {
8107 unsigned char table[128] = {0};
8108
8109 while (*from) {
8110 unsigned int fc = static_cast<unsigned int>(*from);
8111 unsigned int tc = static_cast<unsigned int>(*to);
8112
8113 if (fc >= 128 || tc >= 128) return 0;
8114
8115 // code=128 means "skip character"
8116 if (!table[fc]) table[fc] = static_cast<unsigned char>(tc ? tc : 128);
8117
8118 from++;
8119 if (tc) to++;
8120 }
8121
8122 for (int i = 0; i < 128; ++i)
8123 if (!table[i]) table[i] = static_cast<unsigned char>(i);
8124
8125 void* result = alloc->allocate_nothrow(sizeof(table));
8126
8127 if (result) {
8128 memcpy(result, table, sizeof(table));
8129 }
8130
8131 return static_cast<unsigned char*>(result);
8132}
8133
8134PUGI__FN char_t* translate_table(char_t* buffer, const unsigned char* table) {
8135 char_t* write = buffer;
8136
8137 while (*buffer) {
8138 char_t ch = *buffer++;
8139 unsigned int index = static_cast<unsigned int>(ch);
8140
8141 if (index < 128) {
8142 unsigned char code = table[index];
8143
8144 // code=128 means "skip character" (table size is 128 so 128 can be a
8145 // special value) this code skips these characters without extra branches
8146 *write = static_cast<char_t>(code);
8147 write += 1 - (code >> 7);
8148 } else {
8149 *write++ = ch;
8150 }
8151 }
8152
8153 // zero-terminate
8154 *write = 0;
8155
8156 return write;
8157}
8158
8159inline bool is_xpath_attribute(const char_t* name) {
8160 return !(starts_with(name, PUGIXML_TEXT("xmlns")) &&
8161 (name[5] == 0 || name[5] == ':'));
8162}
8163
8164struct xpath_variable_boolean : xpath_variable {
8165 xpath_variable_boolean() : xpath_variable(xpath_type_boolean), value(false) {}
8166
8167 bool value;
8168 char_t name[1];
8169};
8170
8171struct xpath_variable_number : xpath_variable {
8172 xpath_variable_number() : xpath_variable(xpath_type_number), value(0) {}
8173
8174 double value;
8175 char_t name[1];
8176};
8177
8178struct xpath_variable_string : xpath_variable {
8179 xpath_variable_string() : xpath_variable(xpath_type_string), value(0) {}
8180
8182 if (value) xml_memory::deallocate(value);
8183 }
8184
8185 char_t* value;
8186 char_t name[1];
8187};
8188
8189struct xpath_variable_node_set : xpath_variable {
8190 xpath_variable_node_set() : xpath_variable(xpath_type_node_set) {}
8191
8192 xpath_node_set value;
8193 char_t name[1];
8194};
8195
8196static const xpath_node_set dummy_node_set;
8197
8198PUGI__FN unsigned int hash_string(const char_t* str) {
8199 // Jenkins one-at-a-time hash
8200 // (http://en.wikipedia.org/wiki/Jenkins_hash_function#one-at-a-time)
8201 unsigned int result = 0;
8202
8203 while (*str) {
8204 result += static_cast<unsigned int>(*str++);
8205 result += result << 10;
8206 result ^= result >> 6;
8207 }
8208
8209 result += result << 3;
8210 result ^= result >> 11;
8211 result += result << 15;
8212
8213 return result;
8214}
8215
8216template <typename T>
8217PUGI__FN T* new_xpath_variable(const char_t* name) {
8218 size_t length = strlength(name);
8219 if (length == 0) return 0; // empty variable names are invalid
8220
8221 // $$ we can't use offsetof(T, name) because T is non-POD, so we just allocate
8222 // additional length characters
8223 void* memory = xml_memory::allocate(sizeof(T) + length * sizeof(char_t));
8224 if (!memory) return 0;
8225
8226 T* result = new (memory) T();
8227
8228 memcpy(result->name, name, (length + 1) * sizeof(char_t));
8229
8230 return result;
8231}
8232
8233PUGI__FN xpath_variable* new_xpath_variable(xpath_value_type type,
8234 const char_t* name) {
8235 switch (type) {
8236 case xpath_type_node_set:
8237 return new_xpath_variable<xpath_variable_node_set>(name);
8238
8239 case xpath_type_number:
8240 return new_xpath_variable<xpath_variable_number>(name);
8241
8242 case xpath_type_string:
8243 return new_xpath_variable<xpath_variable_string>(name);
8244
8245 case xpath_type_boolean:
8246 return new_xpath_variable<xpath_variable_boolean>(name);
8247
8248 default:
8249 return 0;
8250 }
8251}
8252
8253template <typename T>
8254PUGI__FN void delete_xpath_variable(T* var) {
8255 var->~T();
8256 xml_memory::deallocate(var);
8257}
8258
8259PUGI__FN void delete_xpath_variable(xpath_value_type type,
8260 xpath_variable* var) {
8261 switch (type) {
8262 case xpath_type_node_set:
8263 delete_xpath_variable(static_cast<xpath_variable_node_set*>(var));
8264 break;
8265
8266 case xpath_type_number:
8267 delete_xpath_variable(static_cast<xpath_variable_number*>(var));
8268 break;
8269
8270 case xpath_type_string:
8271 delete_xpath_variable(static_cast<xpath_variable_string*>(var));
8272 break;
8273
8274 case xpath_type_boolean:
8275 delete_xpath_variable(static_cast<xpath_variable_boolean*>(var));
8276 break;
8277
8278 default:
8279 assert(false && "Invalid variable type");
8280 }
8281}
8282
8283PUGI__FN bool copy_xpath_variable(xpath_variable* lhs,
8284 const xpath_variable* rhs) {
8285 switch (rhs->type()) {
8286 case xpath_type_node_set:
8287 return lhs->set(static_cast<const xpath_variable_node_set*>(rhs)->value);
8288
8289 case xpath_type_number:
8290 return lhs->set(static_cast<const xpath_variable_number*>(rhs)->value);
8291
8292 case xpath_type_string:
8293 return lhs->set(static_cast<const xpath_variable_string*>(rhs)->value);
8294
8295 case xpath_type_boolean:
8296 return lhs->set(static_cast<const xpath_variable_boolean*>(rhs)->value);
8297
8298 default:
8299 assert(false && "Invalid variable type");
8300 return false;
8301 }
8302}
8303
8304PUGI__FN bool get_variable_scratch(char_t (&buffer)[32],
8305 xpath_variable_set* set, const char_t* begin,
8306 const char_t* end,
8307 xpath_variable** out_result) {
8308 size_t length = static_cast<size_t>(end - begin);
8309 char_t* scratch = buffer;
8310
8311 if (length >= sizeof(buffer) / sizeof(buffer[0])) {
8312 // need to make dummy on-heap copy
8313 scratch = static_cast<char_t*>(
8314 xml_memory::allocate((length + 1) * sizeof(char_t)));
8315 if (!scratch) return false;
8316 }
8317
8318 // copy string to zero-terminated buffer and perform lookup
8319 memcpy(scratch, begin, length * sizeof(char_t));
8320 scratch[length] = 0;
8321
8322 *out_result = set->get(scratch);
8323
8324 // free dummy buffer
8325 if (scratch != buffer) xml_memory::deallocate(scratch);
8326
8327 return true;
8328}
8329PUGI__NS_END
8330
8331// Internal node set class
8332PUGI__NS_BEGIN
8333PUGI__FN xpath_node_set::type_t xpath_get_order(const xpath_node* begin,
8334 const xpath_node* end) {
8335 if (end - begin < 2) return xpath_node_set::type_sorted;
8336
8338
8339 bool first = cmp(begin[0], begin[1]);
8340
8341 for (const xpath_node* it = begin + 1; it + 1 < end; ++it)
8342 if (cmp(it[0], it[1]) != first) return xpath_node_set::type_unsorted;
8343
8344 return first ? xpath_node_set::type_sorted
8345 : xpath_node_set::type_sorted_reverse;
8346}
8347
8348PUGI__FN xpath_node_set::type_t xpath_sort(xpath_node* begin, xpath_node* end,
8349 xpath_node_set::type_t type,
8350 bool rev) {
8351 xpath_node_set::type_t order =
8352 rev ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted;
8353
8354 if (type == xpath_node_set::type_unsorted) {
8355 xpath_node_set::type_t sorted = xpath_get_order(begin, end);
8356
8357 if (sorted == xpath_node_set::type_unsorted) {
8358 sort(begin, end, document_order_comparator());
8359
8360 type = xpath_node_set::type_sorted;
8361 } else
8362 type = sorted;
8363 }
8364
8365 if (type != order) reverse(begin, end);
8366
8367 return order;
8368}
8369
8370PUGI__FN xpath_node xpath_first(const xpath_node* begin, const xpath_node* end,
8371 xpath_node_set::type_t type) {
8372 if (begin == end) return xpath_node();
8373
8374 switch (type) {
8375 case xpath_node_set::type_sorted:
8376 return *begin;
8377
8378 case xpath_node_set::type_sorted_reverse:
8379 return *(end - 1);
8380
8381 case xpath_node_set::type_unsorted:
8382 return *min_element(begin, end, document_order_comparator());
8383
8384 default:
8385 assert(false && "Invalid node set type");
8386 return xpath_node();
8387 }
8388}
8389
8391 xpath_node_set::type_t _type;
8392
8393 xpath_node* _begin;
8394 xpath_node* _end;
8395 xpath_node* _eos;
8396
8397public:
8399 : _type(xpath_node_set::type_unsorted), _begin(0), _end(0), _eos(0) {}
8400
8401 xpath_node* begin() const { return _begin; }
8402
8403 xpath_node* end() const { return _end; }
8404
8405 bool empty() const { return _begin == _end; }
8406
8407 size_t size() const { return static_cast<size_t>(_end - _begin); }
8408
8409 xpath_node first() const { return xpath_first(_begin, _end, _type); }
8410
8411 void push_back_grow(const xpath_node& node, xpath_allocator* alloc);
8412
8413 void push_back(const xpath_node& node, xpath_allocator* alloc) {
8414 if (_end != _eos)
8415 *_end++ = node;
8416 else
8417 push_back_grow(node, alloc);
8418 }
8419
8420 void append(const xpath_node* begin_, const xpath_node* end_,
8421 xpath_allocator* alloc) {
8422 if (begin_ == end_) return;
8423
8424 size_t size_ = static_cast<size_t>(_end - _begin);
8425 size_t capacity = static_cast<size_t>(_eos - _begin);
8426 size_t count = static_cast<size_t>(end_ - begin_);
8427
8428 if (size_ + count > capacity) {
8429 // reallocate the old array or allocate a new one
8430 xpath_node* data = static_cast<xpath_node*>(
8431 alloc->reallocate(_begin, capacity * sizeof(xpath_node),
8432 (size_ + count) * sizeof(xpath_node)));
8433 assert(data);
8434
8435 // finalize
8436 _begin = data;
8437 _end = data + size_;
8438 _eos = data + size_ + count;
8439 }
8440
8441 memcpy(_end, begin_, count * sizeof(xpath_node));
8442 _end += count;
8443 }
8444
8445 void sort_do() { _type = xpath_sort(_begin, _end, _type, false); }
8446
8447 void truncate(xpath_node* pos) {
8448 assert(_begin <= pos && pos <= _end);
8449
8450 _end = pos;
8451 }
8452
8453 void remove_duplicates() {
8454 if (_type == xpath_node_set::type_unsorted)
8455 sort(_begin, _end, duplicate_comparator());
8456
8457 _end = unique(_begin, _end);
8458 }
8459
8460 xpath_node_set::type_t type() const { return _type; }
8461
8462 void set_type(xpath_node_set::type_t value) { _type = value; }
8463};
8464
8465PUGI__FN_NO_INLINE void xpath_node_set_raw::push_back_grow(
8466 const xpath_node& node, xpath_allocator* alloc) {
8467 size_t capacity = static_cast<size_t>(_eos - _begin);
8468
8469 // get new capacity (1.5x rule)
8470 size_t new_capacity = capacity + capacity / 2 + 1;
8471
8472 // reallocate the old array or allocate a new one
8473 xpath_node* data = static_cast<xpath_node*>(
8474 alloc->reallocate(_begin, capacity * sizeof(xpath_node),
8475 new_capacity * sizeof(xpath_node)));
8476 assert(data);
8477
8478 // finalize
8479 _begin = data;
8480 _end = data + capacity;
8481 _eos = data + new_capacity;
8482
8483 // push
8484 *_end++ = node;
8485}
8486PUGI__NS_END
8487
8488PUGI__NS_BEGIN
8490 xpath_node n;
8491 size_t position, size;
8492
8493 xpath_context(const xpath_node& n_, size_t position_, size_t size_)
8494 : n(n_), position(position_), size(size_) {}
8495};
8496
8497enum lexeme_t {
8498 lex_none = 0,
8499 lex_equal,
8500 lex_not_equal,
8501 lex_less,
8502 lex_greater,
8503 lex_less_or_equal,
8504 lex_greater_or_equal,
8505 lex_plus,
8506 lex_minus,
8507 lex_multiply,
8508 lex_union,
8509 lex_var_ref,
8510 lex_open_brace,
8511 lex_close_brace,
8512 lex_quoted_string,
8513 lex_number,
8514 lex_slash,
8515 lex_double_slash,
8516 lex_open_square_brace,
8517 lex_close_square_brace,
8518 lex_string,
8519 lex_comma,
8520 lex_axis_attribute,
8521 lex_dot,
8522 lex_double_dot,
8523 lex_double_colon,
8524 lex_eof
8525};
8526
8528 const char_t* begin;
8529 const char_t* end;
8530
8531 xpath_lexer_string() : begin(0), end(0) {}
8532
8533 bool operator==(const char_t* other) const {
8534 size_t length = static_cast<size_t>(end - begin);
8535
8536 return strequalrange(other, begin, length);
8537 }
8538};
8539
8541 const char_t* _cur;
8542 const char_t* _cur_lexeme_pos;
8543 xpath_lexer_string _cur_lexeme_contents;
8544
8545 lexeme_t _cur_lexeme;
8546
8547public:
8548 explicit xpath_lexer(const char_t* query) : _cur(query) { next(); }
8549
8550 const char_t* state() const { return _cur; }
8551
8552 void next() {
8553 const char_t* cur = _cur;
8554
8555 while (PUGI__IS_CHARTYPE(*cur, ct_space)) ++cur;
8556
8557 // save lexeme position for error reporting
8558 _cur_lexeme_pos = cur;
8559
8560 switch (*cur) {
8561 case 0:
8562 _cur_lexeme = lex_eof;
8563 break;
8564
8565 case '>':
8566 if (*(cur + 1) == '=') {
8567 cur += 2;
8568 _cur_lexeme = lex_greater_or_equal;
8569 } else {
8570 cur += 1;
8571 _cur_lexeme = lex_greater;
8572 }
8573 break;
8574
8575 case '<':
8576 if (*(cur + 1) == '=') {
8577 cur += 2;
8578 _cur_lexeme = lex_less_or_equal;
8579 } else {
8580 cur += 1;
8581 _cur_lexeme = lex_less;
8582 }
8583 break;
8584
8585 case '!':
8586 if (*(cur + 1) == '=') {
8587 cur += 2;
8588 _cur_lexeme = lex_not_equal;
8589 } else {
8590 _cur_lexeme = lex_none;
8591 }
8592 break;
8593
8594 case '=':
8595 cur += 1;
8596 _cur_lexeme = lex_equal;
8597
8598 break;
8599
8600 case '+':
8601 cur += 1;
8602 _cur_lexeme = lex_plus;
8603
8604 break;
8605
8606 case '-':
8607 cur += 1;
8608 _cur_lexeme = lex_minus;
8609
8610 break;
8611
8612 case '*':
8613 cur += 1;
8614 _cur_lexeme = lex_multiply;
8615
8616 break;
8617
8618 case '|':
8619 cur += 1;
8620 _cur_lexeme = lex_union;
8621
8622 break;
8623
8624 case '$':
8625 cur += 1;
8626
8627 if (PUGI__IS_CHARTYPEX(*cur, ctx_start_symbol)) {
8628 _cur_lexeme_contents.begin = cur;
8629
8630 while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
8631
8632 if (cur[0] == ':' && PUGI__IS_CHARTYPEX(cur[1], ctx_symbol)) // qname
8633 {
8634 cur++; // :
8635
8636 while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
8637 }
8638
8639 _cur_lexeme_contents.end = cur;
8640
8641 _cur_lexeme = lex_var_ref;
8642 } else {
8643 _cur_lexeme = lex_none;
8644 }
8645
8646 break;
8647
8648 case '(':
8649 cur += 1;
8650 _cur_lexeme = lex_open_brace;
8651
8652 break;
8653
8654 case ')':
8655 cur += 1;
8656 _cur_lexeme = lex_close_brace;
8657
8658 break;
8659
8660 case '[':
8661 cur += 1;
8662 _cur_lexeme = lex_open_square_brace;
8663
8664 break;
8665
8666 case ']':
8667 cur += 1;
8668 _cur_lexeme = lex_close_square_brace;
8669
8670 break;
8671
8672 case ',':
8673 cur += 1;
8674 _cur_lexeme = lex_comma;
8675
8676 break;
8677
8678 case '/':
8679 if (*(cur + 1) == '/') {
8680 cur += 2;
8681 _cur_lexeme = lex_double_slash;
8682 } else {
8683 cur += 1;
8684 _cur_lexeme = lex_slash;
8685 }
8686 break;
8687
8688 case '.':
8689 if (*(cur + 1) == '.') {
8690 cur += 2;
8691 _cur_lexeme = lex_double_dot;
8692 } else if (PUGI__IS_CHARTYPEX(*(cur + 1), ctx_digit)) {
8693 _cur_lexeme_contents.begin = cur; // .
8694
8695 ++cur;
8696
8697 while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
8698
8699 _cur_lexeme_contents.end = cur;
8700
8701 _cur_lexeme = lex_number;
8702 } else {
8703 cur += 1;
8704 _cur_lexeme = lex_dot;
8705 }
8706 break;
8707
8708 case '@':
8709 cur += 1;
8710 _cur_lexeme = lex_axis_attribute;
8711
8712 break;
8713
8714 case '"':
8715 case '\'': {
8716 char_t terminator = *cur;
8717
8718 ++cur;
8719
8720 _cur_lexeme_contents.begin = cur;
8721 while (*cur && *cur != terminator) cur++;
8722 _cur_lexeme_contents.end = cur;
8723
8724 if (!*cur)
8725 _cur_lexeme = lex_none;
8726 else {
8727 cur += 1;
8728 _cur_lexeme = lex_quoted_string;
8729 }
8730
8731 break;
8732 }
8733
8734 case ':':
8735 if (*(cur + 1) == ':') {
8736 cur += 2;
8737 _cur_lexeme = lex_double_colon;
8738 } else {
8739 _cur_lexeme = lex_none;
8740 }
8741 break;
8742
8743 default:
8744 if (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) {
8745 _cur_lexeme_contents.begin = cur;
8746
8747 while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
8748
8749 if (*cur == '.') {
8750 cur++;
8751
8752 while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
8753 }
8754
8755 _cur_lexeme_contents.end = cur;
8756
8757 _cur_lexeme = lex_number;
8758 } else if (PUGI__IS_CHARTYPEX(*cur, ctx_start_symbol)) {
8759 _cur_lexeme_contents.begin = cur;
8760
8761 while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
8762
8763 if (cur[0] == ':') {
8764 if (cur[1] == '*') // namespace test ncname:*
8765 {
8766 cur += 2; // :*
8767 } else if (PUGI__IS_CHARTYPEX(cur[1],
8768 ctx_symbol)) // namespace test qname
8769 {
8770 cur++; // :
8771
8772 while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
8773 }
8774 }
8775
8776 _cur_lexeme_contents.end = cur;
8777
8778 _cur_lexeme = lex_string;
8779 } else {
8780 _cur_lexeme = lex_none;
8781 }
8782 }
8783
8784 _cur = cur;
8785 }
8786
8787 lexeme_t current() const { return _cur_lexeme; }
8788
8789 const char_t* current_pos() const { return _cur_lexeme_pos; }
8790
8791 const xpath_lexer_string& contents() const {
8792 assert(_cur_lexeme == lex_var_ref || _cur_lexeme == lex_number ||
8793 _cur_lexeme == lex_string || _cur_lexeme == lex_quoted_string);
8794
8795 return _cur_lexeme_contents;
8796 }
8797};
8798
8799enum ast_type_t {
8800 ast_unknown,
8801 ast_op_or, // left or right
8802 ast_op_and, // left and right
8803 ast_op_equal, // left = right
8804 ast_op_not_equal, // left != right
8805 ast_op_less, // left < right
8806 ast_op_greater, // left > right
8807 ast_op_less_or_equal, // left <= right
8808 ast_op_greater_or_equal, // left >= right
8809 ast_op_add, // left + right
8810 ast_op_subtract, // left - right
8811 ast_op_multiply, // left * right
8812 ast_op_divide, // left / right
8813 ast_op_mod, // left % right
8814 ast_op_negate, // left - right
8815 ast_op_union, // left | right
8816 ast_predicate, // apply predicate to set; next points to next predicate
8817 ast_filter, // select * from left where right
8818 ast_string_constant, // string constant
8819 ast_number_constant, // number constant
8820 ast_variable, // variable
8821 ast_func_last, // last()
8822 ast_func_position, // position()
8823 ast_func_count, // count(left)
8824 ast_func_id, // id(left)
8825 ast_func_local_name_0, // local-name()
8826 ast_func_local_name_1, // local-name(left)
8827 ast_func_namespace_uri_0, // namespace-uri()
8828 ast_func_namespace_uri_1, // namespace-uri(left)
8829 ast_func_name_0, // name()
8830 ast_func_name_1, // name(left)
8831 ast_func_string_0, // string()
8832 ast_func_string_1, // string(left)
8833 ast_func_concat, // concat(left, right, siblings)
8834 ast_func_starts_with, // starts_with(left, right)
8835 ast_func_contains, // contains(left, right)
8836 ast_func_substring_before, // substring-before(left, right)
8837 ast_func_substring_after, // substring-after(left, right)
8838 ast_func_substring_2, // substring(left, right)
8839 ast_func_substring_3, // substring(left, right, third)
8840 ast_func_string_length_0, // string-length()
8841 ast_func_string_length_1, // string-length(left)
8842 ast_func_normalize_space_0, // normalize-space()
8843 ast_func_normalize_space_1, // normalize-space(left)
8844 ast_func_translate, // translate(left, right, third)
8845 ast_func_boolean, // boolean(left)
8846 ast_func_not, // not(left)
8847 ast_func_true, // true()
8848 ast_func_false, // false()
8849 ast_func_lang, // lang(left)
8850 ast_func_number_0, // number()
8851 ast_func_number_1, // number(left)
8852 ast_func_sum, // sum(left)
8853 ast_func_floor, // floor(left)
8854 ast_func_ceiling, // ceiling(left)
8855 ast_func_round, // round(left)
8856 ast_step, // process set left with step
8857 ast_step_root, // select root node
8858
8859 ast_opt_translate_table, // translate(left, right, third) where right/third
8860 // are constants
8861 ast_opt_compare_attribute // @name = 'string'
8862};
8863
8864enum axis_t {
8865 axis_ancestor,
8866 axis_ancestor_or_self,
8867 axis_attribute,
8868 axis_child,
8869 axis_descendant,
8870 axis_descendant_or_self,
8871 axis_following,
8872 axis_following_sibling,
8873 axis_namespace,
8874 axis_parent,
8875 axis_preceding,
8876 axis_preceding_sibling,
8877 axis_self
8878};
8879
8880enum nodetest_t {
8881 nodetest_none,
8882 nodetest_name,
8883 nodetest_type_node,
8884 nodetest_type_comment,
8885 nodetest_type_pi,
8886 nodetest_type_text,
8887 nodetest_pi,
8888 nodetest_all,
8889 nodetest_all_in_namespace
8890};
8891
8892enum predicate_t {
8893 predicate_default,
8894 predicate_posinv,
8895 predicate_constant,
8896 predicate_constant_one
8897};
8898
8899enum nodeset_eval_t { nodeset_eval_all, nodeset_eval_any, nodeset_eval_first };
8900
8901template <axis_t N>
8903 static const axis_t axis;
8904};
8905
8906template <axis_t N>
8907const axis_t axis_to_type<N>::axis = N;
8908
8910private:
8911 // node type
8912 char _type;
8913 char _rettype;
8914
8915 // for ast_step
8916 char _axis;
8917
8918 // for ast_step/ast_predicate/ast_filter
8919 char _test;
8920
8921 // tree node structure
8922 xpath_ast_node* _left;
8923 xpath_ast_node* _right;
8924 xpath_ast_node* _next;
8925
8926 union {
8927 // value for ast_string_constant
8928 const char_t* string;
8929 // value for ast_number_constant
8930 double number;
8931 // variable for ast_variable
8932 xpath_variable* variable;
8933 // node test for ast_step (node name/namespace/node type/pi target)
8934 const char_t* nodetest;
8935 // table for ast_opt_translate_table
8936 const unsigned char* table;
8937 } _data;
8938
8940 xpath_ast_node& operator=(const xpath_ast_node&);
8941
8942 template <class Comp>
8943 static bool compare_eq(xpath_ast_node* lhs, xpath_ast_node* rhs,
8944 const xpath_context& c, const xpath_stack& stack,
8945 const Comp& comp) {
8946 xpath_value_type lt = lhs->rettype(), rt = rhs->rettype();
8947
8948 if (lt != xpath_type_node_set && rt != xpath_type_node_set) {
8949 if (lt == xpath_type_boolean || rt == xpath_type_boolean)
8950 return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack));
8951 else if (lt == xpath_type_number || rt == xpath_type_number)
8952 return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack));
8953 else if (lt == xpath_type_string || rt == xpath_type_string) {
8954 xpath_allocator_capture cr(stack.result);
8955
8956 xpath_string ls = lhs->eval_string(c, stack);
8957 xpath_string rs = rhs->eval_string(c, stack);
8958
8959 return comp(ls, rs);
8960 }
8961 } else if (lt == xpath_type_node_set && rt == xpath_type_node_set) {
8962 xpath_allocator_capture cr(stack.result);
8963
8964 xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all);
8965 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
8966
8967 for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
8968 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) {
8969 xpath_allocator_capture cri(stack.result);
8970
8971 if (comp(string_value(*li, stack.result),
8972 string_value(*ri, stack.result)))
8973 return true;
8974 }
8975
8976 return false;
8977 } else {
8978 if (lt == xpath_type_node_set) {
8979 swap(lhs, rhs);
8980 swap(lt, rt);
8981 }
8982
8983 if (lt == xpath_type_boolean)
8984 return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack));
8985 else if (lt == xpath_type_number) {
8986 xpath_allocator_capture cr(stack.result);
8987
8988 double l = lhs->eval_number(c, stack);
8989 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
8990
8991 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) {
8992 xpath_allocator_capture cri(stack.result);
8993
8994 if (comp(l, convert_string_to_number(
8995 string_value(*ri, stack.result).c_str())))
8996 return true;
8997 }
8998
8999 return false;
9000 } else if (lt == xpath_type_string) {
9001 xpath_allocator_capture cr(stack.result);
9002
9003 xpath_string l = lhs->eval_string(c, stack);
9004 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
9005
9006 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) {
9007 xpath_allocator_capture cri(stack.result);
9008
9009 if (comp(l, string_value(*ri, stack.result))) return true;
9010 }
9011
9012 return false;
9013 }
9014 }
9015
9016 assert(false && "Wrong types");
9017 return false;
9018 }
9019
9020 static bool eval_once(xpath_node_set::type_t type, nodeset_eval_t eval) {
9021 return type == xpath_node_set::type_sorted ? eval != nodeset_eval_all
9022 : eval == nodeset_eval_any;
9023 }
9024
9025 template <class Comp>
9026 static bool compare_rel(xpath_ast_node* lhs, xpath_ast_node* rhs,
9027 const xpath_context& c, const xpath_stack& stack,
9028 const Comp& comp) {
9029 xpath_value_type lt = lhs->rettype(), rt = rhs->rettype();
9030
9031 if (lt != xpath_type_node_set && rt != xpath_type_node_set)
9032 return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack));
9033 else if (lt == xpath_type_node_set && rt == xpath_type_node_set) {
9034 xpath_allocator_capture cr(stack.result);
9035
9036 xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all);
9037 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
9038
9039 for (const xpath_node* li = ls.begin(); li != ls.end(); ++li) {
9040 xpath_allocator_capture cri(stack.result);
9041
9042 double l =
9043 convert_string_to_number(string_value(*li, stack.result).c_str());
9044
9045 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) {
9046 xpath_allocator_capture crii(stack.result);
9047
9048 if (comp(l, convert_string_to_number(
9049 string_value(*ri, stack.result).c_str())))
9050 return true;
9051 }
9052 }
9053
9054 return false;
9055 } else if (lt != xpath_type_node_set && rt == xpath_type_node_set) {
9056 xpath_allocator_capture cr(stack.result);
9057
9058 double l = lhs->eval_number(c, stack);
9059 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
9060
9061 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) {
9062 xpath_allocator_capture cri(stack.result);
9063
9064 if (comp(l, convert_string_to_number(
9065 string_value(*ri, stack.result).c_str())))
9066 return true;
9067 }
9068
9069 return false;
9070 } else if (lt == xpath_type_node_set && rt != xpath_type_node_set) {
9071 xpath_allocator_capture cr(stack.result);
9072
9073 xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all);
9074 double r = rhs->eval_number(c, stack);
9075
9076 for (const xpath_node* li = ls.begin(); li != ls.end(); ++li) {
9077 xpath_allocator_capture cri(stack.result);
9078
9079 if (comp(convert_string_to_number(
9080 string_value(*li, stack.result).c_str()),
9081 r))
9082 return true;
9083 }
9084
9085 return false;
9086 } else {
9087 assert(false && "Wrong types");
9088 return false;
9089 }
9090 }
9091
9092 static void apply_predicate_boolean(xpath_node_set_raw& ns, size_t first,
9093 xpath_ast_node* expr,
9094 const xpath_stack& stack, bool once) {
9095 assert(ns.size() >= first);
9096 assert(expr->rettype() != xpath_type_number);
9097
9098 size_t i = 1;
9099 size_t size = ns.size() - first;
9100
9101 xpath_node* last = ns.begin() + first;
9102
9103 // remove_if... or well, sort of
9104 for (xpath_node* it = last; it != ns.end(); ++it, ++i) {
9105 xpath_context c(*it, i, size);
9106
9107 if (expr->eval_boolean(c, stack)) {
9108 *last++ = *it;
9109
9110 if (once) break;
9111 }
9112 }
9113
9114 ns.truncate(last);
9115 }
9116
9117 static void apply_predicate_number(xpath_node_set_raw& ns, size_t first,
9118 xpath_ast_node* expr,
9119 const xpath_stack& stack, bool once) {
9120 assert(ns.size() >= first);
9121 assert(expr->rettype() == xpath_type_number);
9122
9123 size_t i = 1;
9124 size_t size = ns.size() - first;
9125
9126 xpath_node* last = ns.begin() + first;
9127
9128 // remove_if... or well, sort of
9129 for (xpath_node* it = last; it != ns.end(); ++it, ++i) {
9130 xpath_context c(*it, i, size);
9131
9132 if (expr->eval_number(c, stack) == i) {
9133 *last++ = *it;
9134
9135 if (once) break;
9136 }
9137 }
9138
9139 ns.truncate(last);
9140 }
9141
9142 static void apply_predicate_number_const(xpath_node_set_raw& ns, size_t first,
9143 xpath_ast_node* expr,
9144 const xpath_stack& stack) {
9145 assert(ns.size() >= first);
9146 assert(expr->rettype() == xpath_type_number);
9147
9148 size_t size = ns.size() - first;
9149
9150 xpath_node* last = ns.begin() + first;
9151
9152 xpath_context c(xpath_node(), 1, size);
9153
9154 double er = expr->eval_number(c, stack);
9155
9156 if (er >= 1.0 && er <= size) {
9157 size_t eri = static_cast<size_t>(er);
9158
9159 if (er == eri) {
9160 xpath_node r = last[eri - 1];
9161
9162 *last++ = r;
9163 }
9164 }
9165
9166 ns.truncate(last);
9167 }
9168
9169 void apply_predicate(xpath_node_set_raw& ns, size_t first,
9170 const xpath_stack& stack, bool once) {
9171 if (ns.size() == first) return;
9172
9173 assert(_type == ast_filter || _type == ast_predicate);
9174
9175 if (_test == predicate_constant || _test == predicate_constant_one)
9176 apply_predicate_number_const(ns, first, _right, stack);
9177 else if (_right->rettype() == xpath_type_number)
9178 apply_predicate_number(ns, first, _right, stack, once);
9179 else
9180 apply_predicate_boolean(ns, first, _right, stack, once);
9181 }
9182
9183 void apply_predicates(xpath_node_set_raw& ns, size_t first,
9184 const xpath_stack& stack, nodeset_eval_t eval) {
9185 if (ns.size() == first) return;
9186
9187 bool last_once = eval_once(ns.type(), eval);
9188
9189 for (xpath_ast_node* pred = _right; pred; pred = pred->_next)
9190 pred->apply_predicate(ns, first, stack, !pred->_next && last_once);
9191 }
9192
9193 bool step_push(xpath_node_set_raw& ns, xml_attribute_struct* a,
9194 xml_node_struct* parent, xpath_allocator* alloc) {
9195 assert(a);
9196
9197 const char_t* name = a->name ? a->name + 0 : PUGIXML_TEXT("");
9198
9199 switch (_test) {
9200 case nodetest_name:
9201 if (strequal(name, _data.nodetest) && is_xpath_attribute(name)) {
9202 ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc);
9203 return true;
9204 }
9205 break;
9206
9207 case nodetest_type_node:
9208 case nodetest_all:
9209 if (is_xpath_attribute(name)) {
9210 ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc);
9211 return true;
9212 }
9213 break;
9214
9215 case nodetest_all_in_namespace:
9216 if (starts_with(name, _data.nodetest) && is_xpath_attribute(name)) {
9217 ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc);
9218 return true;
9219 }
9220 break;
9221
9222 default:;
9223 }
9224
9225 return false;
9226 }
9227
9228 bool step_push(xpath_node_set_raw& ns, xml_node_struct* n,
9229 xpath_allocator* alloc) {
9230 assert(n);
9231
9232 xml_node_type type = PUGI__NODETYPE(n);
9233
9234 switch (_test) {
9235 case nodetest_name:
9236 if (type == node_element && n->name &&
9237 strequal(n->name, _data.nodetest)) {
9238 ns.push_back(xml_node(n), alloc);
9239 return true;
9240 }
9241 break;
9242
9243 case nodetest_type_node:
9244 ns.push_back(xml_node(n), alloc);
9245 return true;
9246
9247 case nodetest_type_comment:
9248 if (type == node_comment) {
9249 ns.push_back(xml_node(n), alloc);
9250 return true;
9251 }
9252 break;
9253
9254 case nodetest_type_text:
9255 if (type == node_pcdata || type == node_cdata) {
9256 ns.push_back(xml_node(n), alloc);
9257 return true;
9258 }
9259 break;
9260
9261 case nodetest_type_pi:
9262 if (type == node_pi) {
9263 ns.push_back(xml_node(n), alloc);
9264 return true;
9265 }
9266 break;
9267
9268 case nodetest_pi:
9269 if (type == node_pi && n->name && strequal(n->name, _data.nodetest)) {
9270 ns.push_back(xml_node(n), alloc);
9271 return true;
9272 }
9273 break;
9274
9275 case nodetest_all:
9276 if (type == node_element) {
9277 ns.push_back(xml_node(n), alloc);
9278 return true;
9279 }
9280 break;
9281
9282 case nodetest_all_in_namespace:
9283 if (type == node_element && n->name &&
9284 starts_with(n->name, _data.nodetest)) {
9285 ns.push_back(xml_node(n), alloc);
9286 return true;
9287 }
9288 break;
9289
9290 default:
9291 assert(false && "Unknown axis");
9292 }
9293
9294 return false;
9295 }
9296
9297 template <class T>
9298 void step_fill(xpath_node_set_raw& ns, xml_node_struct* n,
9299 xpath_allocator* alloc, bool once, T) {
9300 const axis_t axis = T::axis;
9301
9302 switch (axis) {
9303 case axis_attribute: {
9304 for (xml_attribute_struct* a = n->first_attribute; a;
9305 a = a->next_attribute)
9306 if (step_push(ns, a, n, alloc) & once) return;
9307
9308 break;
9309 }
9310
9311 case axis_child: {
9312 for (xml_node_struct* c = n->first_child; c; c = c->next_sibling)
9313 if (step_push(ns, c, alloc) & once) return;
9314
9315 break;
9316 }
9317
9318 case axis_descendant:
9319 case axis_descendant_or_self: {
9320 if (axis == axis_descendant_or_self)
9321 if (step_push(ns, n, alloc) & once) return;
9322
9323 xml_node_struct* cur = n->first_child;
9324
9325 while (cur) {
9326 if (step_push(ns, cur, alloc) & once) return;
9327
9328 if (cur->first_child)
9329 cur = cur->first_child;
9330 else {
9331 while (!cur->next_sibling) {
9332 cur = cur->parent;
9333
9334 if (cur == n) return;
9335 }
9336
9337 cur = cur->next_sibling;
9338 }
9339 }
9340
9341 break;
9342 }
9343
9344 case axis_following_sibling: {
9345 for (xml_node_struct* c = n->next_sibling; c; c = c->next_sibling)
9346 if (step_push(ns, c, alloc) & once) return;
9347
9348 break;
9349 }
9350
9351 case axis_preceding_sibling: {
9352 for (xml_node_struct* c = n->prev_sibling_c; c->next_sibling;
9353 c = c->prev_sibling_c)
9354 if (step_push(ns, c, alloc) & once) return;
9355
9356 break;
9357 }
9358
9359 case axis_following: {
9360 xml_node_struct* cur = n;
9361
9362 // exit from this node so that we don't include descendants
9363 while (!cur->next_sibling) {
9364 cur = cur->parent;
9365
9366 if (!cur) return;
9367 }
9368
9369 cur = cur->next_sibling;
9370
9371 while (cur) {
9372 if (step_push(ns, cur, alloc) & once) return;
9373
9374 if (cur->first_child)
9375 cur = cur->first_child;
9376 else {
9377 while (!cur->next_sibling) {
9378 cur = cur->parent;
9379
9380 if (!cur) return;
9381 }
9382
9383 cur = cur->next_sibling;
9384 }
9385 }
9386
9387 break;
9388 }
9389
9390 case axis_preceding: {
9391 xml_node_struct* cur = n;
9392
9393 // exit from this node so that we don't include descendants
9394 while (!cur->prev_sibling_c->next_sibling) {
9395 cur = cur->parent;
9396
9397 if (!cur) return;
9398 }
9399
9400 cur = cur->prev_sibling_c;
9401
9402 while (cur) {
9403 if (cur->first_child)
9404 cur = cur->first_child->prev_sibling_c;
9405 else {
9406 // leaf node, can't be ancestor
9407 if (step_push(ns, cur, alloc) & once) return;
9408
9409 while (!cur->prev_sibling_c->next_sibling) {
9410 cur = cur->parent;
9411
9412 if (!cur) return;
9413
9414 if (!node_is_ancestor(cur, n))
9415 if (step_push(ns, cur, alloc) & once) return;
9416 }
9417
9418 cur = cur->prev_sibling_c;
9419 }
9420 }
9421
9422 break;
9423 }
9424
9425 case axis_ancestor:
9426 case axis_ancestor_or_self: {
9427 if (axis == axis_ancestor_or_self)
9428 if (step_push(ns, n, alloc) & once) return;
9429
9430 xml_node_struct* cur = n->parent;
9431
9432 while (cur) {
9433 if (step_push(ns, cur, alloc) & once) return;
9434
9435 cur = cur->parent;
9436 }
9437
9438 break;
9439 }
9440
9441 case axis_self: {
9442 step_push(ns, n, alloc);
9443
9444 break;
9445 }
9446
9447 case axis_parent: {
9448 if (n->parent) step_push(ns, n->parent, alloc);
9449
9450 break;
9451 }
9452
9453 default:
9454 assert(false && "Unimplemented axis");
9455 }
9456 }
9457
9458 template <class T>
9459 void step_fill(xpath_node_set_raw& ns, xml_attribute_struct* a,
9460 xml_node_struct* p, xpath_allocator* alloc, bool once, T v) {
9461 const axis_t axis = T::axis;
9462
9463 switch (axis) {
9464 case axis_ancestor:
9465 case axis_ancestor_or_self: {
9466 if (axis == axis_ancestor_or_self &&
9467 _test == nodetest_type_node) // reject attributes based on
9468 // principal node type test
9469 if (step_push(ns, a, p, alloc) & once) return;
9470
9471 xml_node_struct* cur = p;
9472
9473 while (cur) {
9474 if (step_push(ns, cur, alloc) & once) return;
9475
9476 cur = cur->parent;
9477 }
9478
9479 break;
9480 }
9481
9482 case axis_descendant_or_self:
9483 case axis_self: {
9484 if (_test == nodetest_type_node) // reject attributes based on
9485 // principal node type test
9486 step_push(ns, a, p, alloc);
9487
9488 break;
9489 }
9490
9491 case axis_following: {
9492 xml_node_struct* cur = p;
9493
9494 while (cur) {
9495 if (cur->first_child)
9496 cur = cur->first_child;
9497 else {
9498 while (!cur->next_sibling) {
9499 cur = cur->parent;
9500
9501 if (!cur) return;
9502 }
9503
9504 cur = cur->next_sibling;
9505 }
9506
9507 if (step_push(ns, cur, alloc) & once) return;
9508 }
9509
9510 break;
9511 }
9512
9513 case axis_parent: {
9514 step_push(ns, p, alloc);
9515
9516 break;
9517 }
9518
9519 case axis_preceding: {
9520 // preceding:: axis does not include attribute nodes and attribute
9521 // ancestors (they are the same as parent's ancestors), so we can reuse
9522 // node preceding
9523 step_fill(ns, p, alloc, once, v);
9524 break;
9525 }
9526
9527 default:
9528 assert(false && "Unimplemented axis");
9529 }
9530 }
9531
9532 template <class T>
9533 void step_fill(xpath_node_set_raw& ns, const xpath_node& xn,
9534 xpath_allocator* alloc, bool once, T v) {
9535 const axis_t axis = T::axis;
9536 const bool axis_has_attributes =
9537 (axis == axis_ancestor || axis == axis_ancestor_or_self ||
9538 axis == axis_descendant_or_self || axis == axis_following ||
9539 axis == axis_parent || axis == axis_preceding || axis == axis_self);
9540
9541 if (xn.node())
9542 step_fill(ns, xn.node().internal_object(), alloc, once, v);
9543 else if (axis_has_attributes && xn.attribute() && xn.parent())
9544 step_fill(ns, xn.attribute().internal_object(),
9545 xn.parent().internal_object(), alloc, once, v);
9546 }
9547
9548 template <class T>
9549 xpath_node_set_raw step_do(const xpath_context& c, const xpath_stack& stack,
9550 nodeset_eval_t eval, T v) {
9551 const axis_t axis = T::axis;
9552 const bool axis_reverse =
9553 (axis == axis_ancestor || axis == axis_ancestor_or_self ||
9554 axis == axis_preceding || axis == axis_preceding_sibling);
9555 const xpath_node_set::type_t axis_type =
9556 axis_reverse ? xpath_node_set::type_sorted_reverse
9557 : xpath_node_set::type_sorted;
9558
9559 bool once =
9560 (axis == axis_attribute && _test == nodetest_name) ||
9561 (!_right && eval_once(axis_type, eval)) ||
9562 (_right && !_right->_next && _right->_test == predicate_constant_one);
9563
9565 ns.set_type(axis_type);
9566
9567 if (_left) {
9568 xpath_node_set_raw s = _left->eval_node_set(c, stack, nodeset_eval_all);
9569
9570 // self axis preserves the original order
9571 if (axis == axis_self) ns.set_type(s.type());
9572
9573 for (const xpath_node* it = s.begin(); it != s.end(); ++it) {
9574 size_t size = ns.size();
9575
9576 // in general, all axes generate elements in a particular order, but
9577 // there is no order guarantee if axis is applied to two nodes
9578 if (axis != axis_self && size != 0)
9579 ns.set_type(xpath_node_set::type_unsorted);
9580
9581 step_fill(ns, *it, stack.result, once, v);
9582 if (_right) apply_predicates(ns, size, stack, eval);
9583 }
9584 } else {
9585 step_fill(ns, c.n, stack.result, once, v);
9586 if (_right) apply_predicates(ns, 0, stack, eval);
9587 }
9588
9589 // child, attribute and self axes always generate unique set of nodes
9590 // for other axis, if the set stayed sorted, it stayed unique because the
9591 // traversal algorithms do not visit the same node twice
9592 if (axis != axis_child && axis != axis_attribute && axis != axis_self &&
9593 ns.type() == xpath_node_set::type_unsorted)
9594 ns.remove_duplicates();
9595
9596 return ns;
9597 }
9598
9599public:
9600 xpath_ast_node(ast_type_t type, xpath_value_type rettype_,
9601 const char_t* value)
9602 : _type(static_cast<char>(type)),
9603 _rettype(static_cast<char>(rettype_)),
9604 _axis(0),
9605 _test(0),
9606 _left(0),
9607 _right(0),
9608 _next(0) {
9609 assert(type == ast_string_constant);
9610 _data.string = value;
9611 }
9612
9613 xpath_ast_node(ast_type_t type, xpath_value_type rettype_, double value)
9614 : _type(static_cast<char>(type)),
9615 _rettype(static_cast<char>(rettype_)),
9616 _axis(0),
9617 _test(0),
9618 _left(0),
9619 _right(0),
9620 _next(0) {
9621 assert(type == ast_number_constant);
9622 _data.number = value;
9623 }
9624
9625 xpath_ast_node(ast_type_t type, xpath_value_type rettype_,
9626 xpath_variable* value)
9627 : _type(static_cast<char>(type)),
9628 _rettype(static_cast<char>(rettype_)),
9629 _axis(0),
9630 _test(0),
9631 _left(0),
9632 _right(0),
9633 _next(0) {
9634 assert(type == ast_variable);
9635 _data.variable = value;
9636 }
9637
9638 xpath_ast_node(ast_type_t type, xpath_value_type rettype_,
9639 xpath_ast_node* left = 0, xpath_ast_node* right = 0)
9640 : _type(static_cast<char>(type)),
9641 _rettype(static_cast<char>(rettype_)),
9642 _axis(0),
9643 _test(0),
9644 _left(left),
9645 _right(right),
9646 _next(0) {}
9647
9648 xpath_ast_node(ast_type_t type, xpath_ast_node* left, axis_t axis,
9649 nodetest_t test, const char_t* contents)
9650 : _type(static_cast<char>(type)),
9651 _rettype(xpath_type_node_set),
9652 _axis(static_cast<char>(axis)),
9653 _test(static_cast<char>(test)),
9654 _left(left),
9655 _right(0),
9656 _next(0) {
9657 assert(type == ast_step);
9658 _data.nodetest = contents;
9659 }
9660
9661 xpath_ast_node(ast_type_t type, xpath_ast_node* left, xpath_ast_node* right,
9662 predicate_t test)
9663 : _type(static_cast<char>(type)),
9664 _rettype(xpath_type_node_set),
9665 _axis(0),
9666 _test(static_cast<char>(test)),
9667 _left(left),
9668 _right(right),
9669 _next(0) {
9670 assert(type == ast_filter || type == ast_predicate);
9671 }
9672
9673 void set_next(xpath_ast_node* value) { _next = value; }
9674
9675 void set_right(xpath_ast_node* value) { _right = value; }
9676
9677 bool eval_boolean(const xpath_context& c, const xpath_stack& stack) {
9678 switch (_type) {
9679 case ast_op_or:
9680 return _left->eval_boolean(c, stack) || _right->eval_boolean(c, stack);
9681
9682 case ast_op_and:
9683 return _left->eval_boolean(c, stack) && _right->eval_boolean(c, stack);
9684
9685 case ast_op_equal:
9686 return compare_eq(_left, _right, c, stack, equal_to());
9687
9688 case ast_op_not_equal:
9689 return compare_eq(_left, _right, c, stack, not_equal_to());
9690
9691 case ast_op_less:
9692 return compare_rel(_left, _right, c, stack, less());
9693
9694 case ast_op_greater:
9695 return compare_rel(_right, _left, c, stack, less());
9696
9697 case ast_op_less_or_equal:
9698 return compare_rel(_left, _right, c, stack, less_equal());
9699
9700 case ast_op_greater_or_equal:
9701 return compare_rel(_right, _left, c, stack, less_equal());
9702
9703 case ast_func_starts_with: {
9704 xpath_allocator_capture cr(stack.result);
9705
9706 xpath_string lr = _left->eval_string(c, stack);
9707 xpath_string rr = _right->eval_string(c, stack);
9708
9709 return starts_with(lr.c_str(), rr.c_str());
9710 }
9711
9712 case ast_func_contains: {
9713 xpath_allocator_capture cr(stack.result);
9714
9715 xpath_string lr = _left->eval_string(c, stack);
9716 xpath_string rr = _right->eval_string(c, stack);
9717
9718 return find_substring(lr.c_str(), rr.c_str()) != 0;
9719 }
9720
9721 case ast_func_boolean:
9722 return _left->eval_boolean(c, stack);
9723
9724 case ast_func_not:
9725 return !_left->eval_boolean(c, stack);
9726
9727 case ast_func_true:
9728 return true;
9729
9730 case ast_func_false:
9731 return false;
9732
9733 case ast_func_lang: {
9734 if (c.n.attribute()) return false;
9735
9736 xpath_allocator_capture cr(stack.result);
9737
9738 xpath_string lang = _left->eval_string(c, stack);
9739
9740 for (xml_node n = c.n.node(); n; n = n.parent()) {
9741 xml_attribute a = n.attribute(PUGIXML_TEXT("xml:lang"));
9742
9743 if (a) {
9744 const char_t* value = a.value();
9745
9746 // strnicmp / strncasecmp is not portable
9747 for (const char_t* lit = lang.c_str(); *lit; ++lit) {
9748 if (tolower_ascii(*lit) != tolower_ascii(*value)) return false;
9749 ++value;
9750 }
9751
9752 return *value == 0 || *value == '-';
9753 }
9754 }
9755
9756 return false;
9757 }
9758
9759 case ast_opt_compare_attribute: {
9760 const char_t* value = (_right->_type == ast_string_constant)
9761 ? _right->_data.string
9762 : _right->_data.variable->get_string();
9763
9764 xml_attribute attr = c.n.node().attribute(_left->_data.nodetest);
9765
9766 return attr && strequal(attr.value(), value) &&
9767 is_xpath_attribute(attr.name());
9768 }
9769
9770 case ast_variable: {
9771 assert(_rettype == _data.variable->type());
9772
9773 if (_rettype == xpath_type_boolean)
9774 return _data.variable->get_boolean();
9775 }
9776 // FALL THROUGH
9777
9778 default: {
9779 switch (_rettype) {
9780 case xpath_type_number:
9781 return convert_number_to_boolean(eval_number(c, stack));
9782
9783 case xpath_type_string: {
9784 xpath_allocator_capture cr(stack.result);
9785
9786 return !eval_string(c, stack).empty();
9787 }
9788
9789 case xpath_type_node_set: {
9790 xpath_allocator_capture cr(stack.result);
9791
9792 return !eval_node_set(c, stack, nodeset_eval_any).empty();
9793 }
9794
9795 default:
9796 assert(false && "Wrong expression for return type boolean");
9797 return false;
9798 }
9799 }
9800 }
9801 }
9802
9803 double eval_number(const xpath_context& c, const xpath_stack& stack) {
9804 switch (_type) {
9805 case ast_op_add:
9806 return _left->eval_number(c, stack) + _right->eval_number(c, stack);
9807
9808 case ast_op_subtract:
9809 return _left->eval_number(c, stack) - _right->eval_number(c, stack);
9810
9811 case ast_op_multiply:
9812 return _left->eval_number(c, stack) * _right->eval_number(c, stack);
9813
9814 case ast_op_divide:
9815 return _left->eval_number(c, stack) / _right->eval_number(c, stack);
9816
9817 case ast_op_mod:
9818 return fmod(_left->eval_number(c, stack),
9819 _right->eval_number(c, stack));
9820
9821 case ast_op_negate:
9822 return -_left->eval_number(c, stack);
9823
9824 case ast_number_constant:
9825 return _data.number;
9826
9827 case ast_func_last:
9828 return static_cast<double>(c.size);
9829
9830 case ast_func_position:
9831 return static_cast<double>(c.position);
9832
9833 case ast_func_count: {
9834 xpath_allocator_capture cr(stack.result);
9835
9836 return static_cast<double>(
9837 _left->eval_node_set(c, stack, nodeset_eval_all).size());
9838 }
9839
9840 case ast_func_string_length_0: {
9841 xpath_allocator_capture cr(stack.result);
9842
9843 return static_cast<double>(string_value(c.n, stack.result).length());
9844 }
9845
9846 case ast_func_string_length_1: {
9847 xpath_allocator_capture cr(stack.result);
9848
9849 return static_cast<double>(_left->eval_string(c, stack).length());
9850 }
9851
9852 case ast_func_number_0: {
9853 xpath_allocator_capture cr(stack.result);
9854
9855 return convert_string_to_number(
9856 string_value(c.n, stack.result).c_str());
9857 }
9858
9859 case ast_func_number_1:
9860 return _left->eval_number(c, stack);
9861
9862 case ast_func_sum: {
9863 xpath_allocator_capture cr(stack.result);
9864
9865 double r = 0;
9866
9868 _left->eval_node_set(c, stack, nodeset_eval_all);
9869
9870 for (const xpath_node* it = ns.begin(); it != ns.end(); ++it) {
9871 xpath_allocator_capture cri(stack.result);
9872
9873 r +=
9874 convert_string_to_number(string_value(*it, stack.result).c_str());
9875 }
9876
9877 return r;
9878 }
9879
9880 case ast_func_floor: {
9881 double r = _left->eval_number(c, stack);
9882
9883 return r == r ? floor(r) : r;
9884 }
9885
9886 case ast_func_ceiling: {
9887 double r = _left->eval_number(c, stack);
9888
9889 return r == r ? ceil(r) : r;
9890 }
9891
9892 case ast_func_round:
9893 return round_nearest_nzero(_left->eval_number(c, stack));
9894
9895 case ast_variable: {
9896 assert(_rettype == _data.variable->type());
9897
9898 if (_rettype == xpath_type_number) return _data.variable->get_number();
9899 }
9900 // FALL THROUGH
9901
9902 default: {
9903 switch (_rettype) {
9904 case xpath_type_boolean:
9905 return eval_boolean(c, stack) ? 1 : 0;
9906
9907 case xpath_type_string: {
9908 xpath_allocator_capture cr(stack.result);
9909
9910 return convert_string_to_number(eval_string(c, stack).c_str());
9911 }
9912
9913 case xpath_type_node_set: {
9914 xpath_allocator_capture cr(stack.result);
9915
9916 return convert_string_to_number(eval_string(c, stack).c_str());
9917 }
9918
9919 default:
9920 assert(false && "Wrong expression for return type number");
9921 return 0;
9922 }
9923 }
9924 }
9925 }
9926
9927 xpath_string eval_string_concat(const xpath_context& c,
9928 const xpath_stack& stack) {
9929 assert(_type == ast_func_concat);
9930
9931 xpath_allocator_capture ct(stack.temp);
9932
9933 // count the string number
9934 size_t count = 1;
9935 for (xpath_ast_node* nc = _right; nc; nc = nc->_next) count++;
9936
9937 // gather all strings
9938 xpath_string static_buffer[4];
9939 xpath_string* buffer = static_buffer;
9940
9941 // allocate on-heap for large concats
9942 if (count > sizeof(static_buffer) / sizeof(static_buffer[0])) {
9943 buffer = static_cast<xpath_string*>(
9944 stack.temp->allocate(count * sizeof(xpath_string)));
9945 assert(buffer);
9946 }
9947
9948 // evaluate all strings to temporary stack
9949 xpath_stack swapped_stack = {stack.temp, stack.result};
9950
9951 buffer[0] = _left->eval_string(c, swapped_stack);
9952
9953 size_t pos = 1;
9954 for (xpath_ast_node* n = _right; n; n = n->_next, ++pos)
9955 buffer[pos] = n->eval_string(c, swapped_stack);
9956 assert(pos == count);
9957
9958 // get total length
9959 size_t length = 0;
9960 for (size_t i = 0; i < count; ++i) length += buffer[i].length();
9961
9962 // create final string
9963 char_t* result = static_cast<char_t*>(
9964 stack.result->allocate((length + 1) * sizeof(char_t)));
9965 assert(result);
9966
9967 char_t* ri = result;
9968
9969 for (size_t j = 0; j < count; ++j)
9970 for (const char_t* bi = buffer[j].c_str(); *bi; ++bi) *ri++ = *bi;
9971
9972 *ri = 0;
9973
9974 return xpath_string::from_heap_preallocated(result, ri);
9975 }
9976
9977 xpath_string eval_string(const xpath_context& c, const xpath_stack& stack) {
9978 switch (_type) {
9979 case ast_string_constant:
9980 return xpath_string::from_const(_data.string);
9981
9982 case ast_func_local_name_0: {
9983 xpath_node na = c.n;
9984
9985 return xpath_string::from_const(local_name(na));
9986 }
9987
9988 case ast_func_local_name_1: {
9989 xpath_allocator_capture cr(stack.result);
9990
9992 _left->eval_node_set(c, stack, nodeset_eval_first);
9993 xpath_node na = ns.first();
9994
9995 return xpath_string::from_const(local_name(na));
9996 }
9997
9998 case ast_func_name_0: {
9999 xpath_node na = c.n;
10000
10001 return xpath_string::from_const(qualified_name(na));
10002 }
10003
10004 case ast_func_name_1: {
10005 xpath_allocator_capture cr(stack.result);
10006
10008 _left->eval_node_set(c, stack, nodeset_eval_first);
10009 xpath_node na = ns.first();
10010
10011 return xpath_string::from_const(qualified_name(na));
10012 }
10013
10014 case ast_func_namespace_uri_0: {
10015 xpath_node na = c.n;
10016
10017 return xpath_string::from_const(namespace_uri(na));
10018 }
10019
10020 case ast_func_namespace_uri_1: {
10021 xpath_allocator_capture cr(stack.result);
10022
10024 _left->eval_node_set(c, stack, nodeset_eval_first);
10025 xpath_node na = ns.first();
10026
10027 return xpath_string::from_const(namespace_uri(na));
10028 }
10029
10030 case ast_func_string_0:
10031 return string_value(c.n, stack.result);
10032
10033 case ast_func_string_1:
10034 return _left->eval_string(c, stack);
10035
10036 case ast_func_concat:
10037 return eval_string_concat(c, stack);
10038
10039 case ast_func_substring_before: {
10040 xpath_allocator_capture cr(stack.temp);
10041
10042 xpath_stack swapped_stack = {stack.temp, stack.result};
10043
10044 xpath_string s = _left->eval_string(c, swapped_stack);
10045 xpath_string p = _right->eval_string(c, swapped_stack);
10046
10047 const char_t* pos = find_substring(s.c_str(), p.c_str());
10048
10049 return pos ? xpath_string::from_heap(s.c_str(), pos, stack.result)
10050 : xpath_string();
10051 }
10052
10053 case ast_func_substring_after: {
10054 xpath_allocator_capture cr(stack.temp);
10055
10056 xpath_stack swapped_stack = {stack.temp, stack.result};
10057
10058 xpath_string s = _left->eval_string(c, swapped_stack);
10059 xpath_string p = _right->eval_string(c, swapped_stack);
10060
10061 const char_t* pos = find_substring(s.c_str(), p.c_str());
10062 if (!pos) return xpath_string();
10063
10064 const char_t* rbegin = pos + p.length();
10065 const char_t* rend = s.c_str() + s.length();
10066
10067 return s.uses_heap()
10068 ? xpath_string::from_heap(rbegin, rend, stack.result)
10069 : xpath_string::from_const(rbegin);
10070 }
10071
10072 case ast_func_substring_2: {
10073 xpath_allocator_capture cr(stack.temp);
10074
10075 xpath_stack swapped_stack = {stack.temp, stack.result};
10076
10077 xpath_string s = _left->eval_string(c, swapped_stack);
10078 size_t s_length = s.length();
10079
10080 double first = round_nearest(_right->eval_number(c, stack));
10081
10082 if (is_nan(first))
10083 return xpath_string(); // NaN
10084 else if (first >= s_length + 1)
10085 return xpath_string();
10086
10087 size_t pos = first < 1 ? 1 : static_cast<size_t>(first);
10088 assert(1 <= pos && pos <= s_length + 1);
10089
10090 const char_t* rbegin = s.c_str() + (pos - 1);
10091 const char_t* rend = s.c_str() + s.length();
10092
10093 return s.uses_heap()
10094 ? xpath_string::from_heap(rbegin, rend, stack.result)
10095 : xpath_string::from_const(rbegin);
10096 }
10097
10098 case ast_func_substring_3: {
10099 xpath_allocator_capture cr(stack.temp);
10100
10101 xpath_stack swapped_stack = {stack.temp, stack.result};
10102
10103 xpath_string s = _left->eval_string(c, swapped_stack);
10104 size_t s_length = s.length();
10105
10106 double first = round_nearest(_right->eval_number(c, stack));
10107 double last =
10108 first + round_nearest(_right->_next->eval_number(c, stack));
10109
10110 if (is_nan(first) || is_nan(last))
10111 return xpath_string();
10112 else if (first >= s_length + 1)
10113 return xpath_string();
10114 else if (first >= last)
10115 return xpath_string();
10116 else if (last < 1)
10117 return xpath_string();
10118
10119 size_t pos = first < 1 ? 1 : static_cast<size_t>(first);
10120 size_t end =
10121 last >= s_length + 1 ? s_length + 1 : static_cast<size_t>(last);
10122
10123 assert(1 <= pos && pos <= end && end <= s_length + 1);
10124 const char_t* rbegin = s.c_str() + (pos - 1);
10125 const char_t* rend = s.c_str() + (end - 1);
10126
10127 return (end == s_length + 1 && !s.uses_heap())
10128 ? xpath_string::from_const(rbegin)
10129 : xpath_string::from_heap(rbegin, rend, stack.result);
10130 }
10131
10132 case ast_func_normalize_space_0: {
10133 xpath_string s = string_value(c.n, stack.result);
10134
10135 char_t* begin = s.data(stack.result);
10136 char_t* end = normalize_space(begin);
10137
10138 return xpath_string::from_heap_preallocated(begin, end);
10139 }
10140
10141 case ast_func_normalize_space_1: {
10142 xpath_string s = _left->eval_string(c, stack);
10143
10144 char_t* begin = s.data(stack.result);
10145 char_t* end = normalize_space(begin);
10146
10147 return xpath_string::from_heap_preallocated(begin, end);
10148 }
10149
10150 case ast_func_translate: {
10151 xpath_allocator_capture cr(stack.temp);
10152
10153 xpath_stack swapped_stack = {stack.temp, stack.result};
10154
10155 xpath_string s = _left->eval_string(c, stack);
10156 xpath_string from = _right->eval_string(c, swapped_stack);
10157 xpath_string to = _right->_next->eval_string(c, swapped_stack);
10158
10159 char_t* begin = s.data(stack.result);
10160 char_t* end = translate(begin, from.c_str(), to.c_str(), to.length());
10161
10162 return xpath_string::from_heap_preallocated(begin, end);
10163 }
10164
10165 case ast_opt_translate_table: {
10166 xpath_string s = _left->eval_string(c, stack);
10167
10168 char_t* begin = s.data(stack.result);
10169 char_t* end = translate_table(begin, _data.table);
10170
10171 return xpath_string::from_heap_preallocated(begin, end);
10172 }
10173
10174 case ast_variable: {
10175 assert(_rettype == _data.variable->type());
10176
10177 if (_rettype == xpath_type_string)
10178 return xpath_string::from_const(_data.variable->get_string());
10179 }
10180 // FALL THROUGH
10181
10182 default: {
10183 switch (_rettype) {
10184 case xpath_type_boolean:
10185 return xpath_string::from_const(eval_boolean(c, stack)
10186 ? PUGIXML_TEXT("true")
10187 : PUGIXML_TEXT("false"));
10188
10189 case xpath_type_number:
10190 return convert_number_to_string(eval_number(c, stack),
10191 stack.result);
10192
10193 case xpath_type_node_set: {
10194 xpath_allocator_capture cr(stack.temp);
10195
10196 xpath_stack swapped_stack = {stack.temp, stack.result};
10197
10199 eval_node_set(c, swapped_stack, nodeset_eval_first);
10200 return ns.empty() ? xpath_string()
10201 : string_value(ns.first(), stack.result);
10202 }
10203
10204 default:
10205 assert(false && "Wrong expression for return type string");
10206 return xpath_string();
10207 }
10208 }
10209 }
10210 }
10211
10212 xpath_node_set_raw eval_node_set(const xpath_context& c,
10213 const xpath_stack& stack,
10214 nodeset_eval_t eval) {
10215 switch (_type) {
10216 case ast_op_union: {
10217 xpath_allocator_capture cr(stack.temp);
10218
10219 xpath_stack swapped_stack = {stack.temp, stack.result};
10220
10221 xpath_node_set_raw ls = _left->eval_node_set(c, swapped_stack, eval);
10222 xpath_node_set_raw rs = _right->eval_node_set(c, stack, eval);
10223
10224 // we can optimize merging two sorted sets, but this is a very rare
10225 // operation, so don't bother
10226 rs.set_type(xpath_node_set::type_unsorted);
10227
10228 rs.append(ls.begin(), ls.end(), stack.result);
10229 rs.remove_duplicates();
10230
10231 return rs;
10232 }
10233
10234 case ast_filter: {
10235 xpath_node_set_raw set = _left->eval_node_set(
10236 c, stack,
10237 _test == predicate_constant_one ? nodeset_eval_first
10238 : nodeset_eval_all);
10239
10240 // either expression is a number or it contains position() call; sort by
10241 // document order
10242 if (_test != predicate_posinv) set.sort_do();
10243
10244 bool once = eval_once(set.type(), eval);
10245
10246 apply_predicate(set, 0, stack, once);
10247
10248 return set;
10249 }
10250
10251 case ast_func_id:
10252 return xpath_node_set_raw();
10253
10254 case ast_step: {
10255 switch (_axis) {
10256 case axis_ancestor:
10257 return step_do(c, stack, eval, axis_to_type<axis_ancestor>());
10258
10259 case axis_ancestor_or_self:
10260 return step_do(c, stack, eval,
10262
10263 case axis_attribute:
10264 return step_do(c, stack, eval, axis_to_type<axis_attribute>());
10265
10266 case axis_child:
10267 return step_do(c, stack, eval, axis_to_type<axis_child>());
10268
10269 case axis_descendant:
10270 return step_do(c, stack, eval, axis_to_type<axis_descendant>());
10271
10272 case axis_descendant_or_self:
10273 return step_do(c, stack, eval,
10275
10276 case axis_following:
10277 return step_do(c, stack, eval, axis_to_type<axis_following>());
10278
10279 case axis_following_sibling:
10280 return step_do(c, stack, eval,
10282
10283 case axis_namespace:
10284 // namespaced axis is not supported
10285 return xpath_node_set_raw();
10286
10287 case axis_parent:
10288 return step_do(c, stack, eval, axis_to_type<axis_parent>());
10289
10290 case axis_preceding:
10291 return step_do(c, stack, eval, axis_to_type<axis_preceding>());
10292
10293 case axis_preceding_sibling:
10294 return step_do(c, stack, eval,
10296
10297 case axis_self:
10298 return step_do(c, stack, eval, axis_to_type<axis_self>());
10299
10300 default:
10301 assert(false && "Unknown axis");
10302 return xpath_node_set_raw();
10303 }
10304 }
10305
10306 case ast_step_root: {
10307 assert(!_right); // root step can't have any predicates
10308
10310
10311 ns.set_type(xpath_node_set::type_sorted);
10312
10313 if (c.n.node())
10314 ns.push_back(c.n.node().root(), stack.result);
10315 else if (c.n.attribute())
10316 ns.push_back(c.n.parent().root(), stack.result);
10317
10318 return ns;
10319 }
10320
10321 case ast_variable: {
10322 assert(_rettype == _data.variable->type());
10323
10324 if (_rettype == xpath_type_node_set) {
10325 const xpath_node_set& s = _data.variable->get_node_set();
10326
10328
10329 ns.set_type(s.type());
10330 ns.append(s.begin(), s.end(), stack.result);
10331
10332 return ns;
10333 }
10334 }
10335 // FALL THROUGH
10336
10337 default:
10338 assert(false && "Wrong expression for return type node set");
10339 return xpath_node_set_raw();
10340 }
10341 }
10342
10343 void optimize(xpath_allocator* alloc) {
10344 if (_left) _left->optimize(alloc);
10345
10346 if (_right) _right->optimize(alloc);
10347
10348 if (_next) _next->optimize(alloc);
10349
10350 optimize_self(alloc);
10351 }
10352
10353 void optimize_self(xpath_allocator* alloc) {
10354 // Rewrite [position()=expr] with [expr]
10355 // Note that this step has to go before classification to recognize
10356 // [position()=1]
10357 if ((_type == ast_filter || _type == ast_predicate) &&
10358 _right->_type == ast_op_equal &&
10359 _right->_left->_type == ast_func_position &&
10360 _right->_right->_rettype == xpath_type_number) {
10361 _right = _right->_right;
10362 }
10363
10364 // Classify filter/predicate ops to perform various optimizations during
10365 // evaluation
10366 if (_type == ast_filter || _type == ast_predicate) {
10367 assert(_test == predicate_default);
10368
10369 if (_right->_type == ast_number_constant && _right->_data.number == 1.0)
10370 _test = predicate_constant_one;
10371 else if (_right->_rettype == xpath_type_number &&
10372 (_right->_type == ast_number_constant ||
10373 _right->_type == ast_variable ||
10374 _right->_type == ast_func_last))
10375 _test = predicate_constant;
10376 else if (_right->_rettype != xpath_type_number &&
10377 _right->is_posinv_expr())
10378 _test = predicate_posinv;
10379 }
10380
10381 // Rewrite descendant-or-self::node()/child::foo with descendant::foo
10382 // The former is a full form of //foo, the latter is much faster since it
10383 // executes the node test immediately Do a similar kind of rewrite for
10384 // self/descendant/descendant-or-self axes Note that we only rewrite
10385 // positionally invariant steps (//foo[1] != /descendant::foo[1])
10386 if (_type == ast_step &&
10387 (_axis == axis_child || _axis == axis_self ||
10388 _axis == axis_descendant || _axis == axis_descendant_or_self) &&
10389 _left && _left->_type == ast_step &&
10390 _left->_axis == axis_descendant_or_self &&
10391 _left->_test == nodetest_type_node && !_left->_right &&
10392 is_posinv_step()) {
10393 if (_axis == axis_child || _axis == axis_descendant)
10394 _axis = axis_descendant;
10395 else
10396 _axis = axis_descendant_or_self;
10397
10398 _left = _left->_left;
10399 }
10400
10401 // Use optimized lookup table implementation for translate() with constant
10402 // arguments
10403 if (_type == ast_func_translate && _right->_type == ast_string_constant &&
10404 _right->_next->_type == ast_string_constant) {
10405 unsigned char* table = translate_table_generate(
10406 alloc, _right->_data.string, _right->_next->_data.string);
10407
10408 if (table) {
10409 _type = ast_opt_translate_table;
10410 _data.table = table;
10411 }
10412 }
10413
10414 // Use optimized path for @attr = 'value' or @attr = $value
10415 if (_type == ast_op_equal && _left->_type == ast_step &&
10416 _left->_axis == axis_attribute && _left->_test == nodetest_name &&
10417 !_left->_left && !_left->_right &&
10418 (_right->_type == ast_string_constant ||
10419 (_right->_type == ast_variable &&
10420 _right->_rettype == xpath_type_string))) {
10421 _type = ast_opt_compare_attribute;
10422 }
10423 }
10424
10425 bool is_posinv_expr() const {
10426 switch (_type) {
10427 case ast_func_position:
10428 case ast_func_last:
10429 return false;
10430
10431 case ast_string_constant:
10432 case ast_number_constant:
10433 case ast_variable:
10434 return true;
10435
10436 case ast_step:
10437 case ast_step_root:
10438 return true;
10439
10440 case ast_predicate:
10441 case ast_filter:
10442 return true;
10443
10444 default:
10445 if (_left && !_left->is_posinv_expr()) return false;
10446
10447 for (xpath_ast_node* n = _right; n; n = n->_next)
10448 if (!n->is_posinv_expr()) return false;
10449
10450 return true;
10451 }
10452 }
10453
10454 bool is_posinv_step() const {
10455 assert(_type == ast_step);
10456
10457 for (xpath_ast_node* n = _right; n; n = n->_next) {
10458 assert(n->_type == ast_predicate);
10459
10460 if (n->_test != predicate_posinv) return false;
10461 }
10462
10463 return true;
10464 }
10465
10466 xpath_value_type rettype() const {
10467 return static_cast<xpath_value_type>(_rettype);
10468 }
10469};
10470
10472 xpath_allocator* _alloc;
10473 xpath_lexer _lexer;
10474
10475 const char_t* _query;
10476 xpath_variable_set* _variables;
10477
10478 xpath_parse_result* _result;
10479
10480 char_t _scratch[32];
10481
10482#ifdef PUGIXML_NO_EXCEPTIONS
10483 jmp_buf _error_handler;
10484#endif
10485
10486 void throw_error(const char* message) {
10487 _result->error = message;
10488 _result->offset = _lexer.current_pos() - _query;
10489
10490#ifdef PUGIXML_NO_EXCEPTIONS
10491 longjmp(_error_handler, 1);
10492#else
10493 throw xpath_exception(*_result);
10494#endif
10495 }
10496
10497 void throw_error_oom() {
10498#ifdef PUGIXML_NO_EXCEPTIONS
10499 throw_error("Out of memory");
10500#else
10501 throw std::bad_alloc();
10502#endif
10503 }
10504
10505 void* alloc_node() {
10506 void* result = _alloc->allocate_nothrow(sizeof(xpath_ast_node));
10507
10508 if (!result) throw_error_oom();
10509
10510 return result;
10511 }
10512
10513 const char_t* alloc_string(const xpath_lexer_string& value) {
10514 if (value.begin) {
10515 size_t length = static_cast<size_t>(value.end - value.begin);
10516
10517 char_t* c = static_cast<char_t*>(
10518 _alloc->allocate_nothrow((length + 1) * sizeof(char_t)));
10519 if (!c) throw_error_oom();
10520 assert(c); // workaround for clang static analysis
10521
10522 memcpy(c, value.begin, length * sizeof(char_t));
10523 c[length] = 0;
10524
10525 return c;
10526 } else
10527 return 0;
10528 }
10529
10530 xpath_ast_node* parse_function_helper(ast_type_t type0, ast_type_t type1,
10531 size_t argc, xpath_ast_node* args[2]) {
10532 assert(argc <= 1);
10533
10534 if (argc == 1 && args[0]->rettype() != xpath_type_node_set)
10535 throw_error("Function has to be applied to node set");
10536
10537 return new (alloc_node())
10538 xpath_ast_node(argc == 0 ? type0 : type1, xpath_type_string, args[0]);
10539 }
10540
10541 xpath_ast_node* parse_function(const xpath_lexer_string& name, size_t argc,
10542 xpath_ast_node* args[2]) {
10543 switch (name.begin[0]) {
10544 case 'b':
10545 if (name == PUGIXML_TEXT("boolean") && argc == 1)
10546 return new (alloc_node())
10547 xpath_ast_node(ast_func_boolean, xpath_type_boolean, args[0]);
10548
10549 break;
10550
10551 case 'c':
10552 if (name == PUGIXML_TEXT("count") && argc == 1) {
10553 if (args[0]->rettype() != xpath_type_node_set)
10554 throw_error("Function has to be applied to node set");
10555
10556 return new (alloc_node())
10557 xpath_ast_node(ast_func_count, xpath_type_number, args[0]);
10558 } else if (name == PUGIXML_TEXT("contains") && argc == 2)
10559 return new (alloc_node()) xpath_ast_node(
10560 ast_func_contains, xpath_type_boolean, args[0], args[1]);
10561 else if (name == PUGIXML_TEXT("concat") && argc >= 2)
10562 return new (alloc_node()) xpath_ast_node(
10563 ast_func_concat, xpath_type_string, args[0], args[1]);
10564 else if (name == PUGIXML_TEXT("ceiling") && argc == 1)
10565 return new (alloc_node())
10566 xpath_ast_node(ast_func_ceiling, xpath_type_number, args[0]);
10567
10568 break;
10569
10570 case 'f':
10571 if (name == PUGIXML_TEXT("false") && argc == 0)
10572 return new (alloc_node())
10573 xpath_ast_node(ast_func_false, xpath_type_boolean);
10574 else if (name == PUGIXML_TEXT("floor") && argc == 1)
10575 return new (alloc_node())
10576 xpath_ast_node(ast_func_floor, xpath_type_number, args[0]);
10577
10578 break;
10579
10580 case 'i':
10581 if (name == PUGIXML_TEXT("id") && argc == 1)
10582 return new (alloc_node())
10583 xpath_ast_node(ast_func_id, xpath_type_node_set, args[0]);
10584
10585 break;
10586
10587 case 'l':
10588 if (name == PUGIXML_TEXT("last") && argc == 0)
10589 return new (alloc_node())
10590 xpath_ast_node(ast_func_last, xpath_type_number);
10591 else if (name == PUGIXML_TEXT("lang") && argc == 1)
10592 return new (alloc_node())
10593 xpath_ast_node(ast_func_lang, xpath_type_boolean, args[0]);
10594 else if (name == PUGIXML_TEXT("local-name") && argc <= 1)
10595 return parse_function_helper(ast_func_local_name_0,
10596 ast_func_local_name_1, argc, args);
10597
10598 break;
10599
10600 case 'n':
10601 if (name == PUGIXML_TEXT("name") && argc <= 1)
10602 return parse_function_helper(ast_func_name_0, ast_func_name_1, argc,
10603 args);
10604 else if (name == PUGIXML_TEXT("namespace-uri") && argc <= 1)
10605 return parse_function_helper(ast_func_namespace_uri_0,
10606 ast_func_namespace_uri_1, argc, args);
10607 else if (name == PUGIXML_TEXT("normalize-space") && argc <= 1)
10608 return new (alloc_node())
10609 xpath_ast_node(argc == 0 ? ast_func_normalize_space_0
10610 : ast_func_normalize_space_1,
10611 xpath_type_string, args[0], args[1]);
10612 else if (name == PUGIXML_TEXT("not") && argc == 1)
10613 return new (alloc_node())
10614 xpath_ast_node(ast_func_not, xpath_type_boolean, args[0]);
10615 else if (name == PUGIXML_TEXT("number") && argc <= 1)
10616 return new (alloc_node())
10617 xpath_ast_node(argc == 0 ? ast_func_number_0 : ast_func_number_1,
10618 xpath_type_number, args[0]);
10619
10620 break;
10621
10622 case 'p':
10623 if (name == PUGIXML_TEXT("position") && argc == 0)
10624 return new (alloc_node())
10625 xpath_ast_node(ast_func_position, xpath_type_number);
10626
10627 break;
10628
10629 case 'r':
10630 if (name == PUGIXML_TEXT("round") && argc == 1)
10631 return new (alloc_node())
10632 xpath_ast_node(ast_func_round, xpath_type_number, args[0]);
10633
10634 break;
10635
10636 case 's':
10637 if (name == PUGIXML_TEXT("string") && argc <= 1)
10638 return new (alloc_node())
10639 xpath_ast_node(argc == 0 ? ast_func_string_0 : ast_func_string_1,
10640 xpath_type_string, args[0]);
10641 else if (name == PUGIXML_TEXT("string-length") && argc <= 1)
10642 return new (alloc_node()) xpath_ast_node(
10643 argc == 0 ? ast_func_string_length_0 : ast_func_string_length_1,
10644 xpath_type_number, args[0]);
10645 else if (name == PUGIXML_TEXT("starts-with") && argc == 2)
10646 return new (alloc_node()) xpath_ast_node(
10647 ast_func_starts_with, xpath_type_boolean, args[0], args[1]);
10648 else if (name == PUGIXML_TEXT("substring-before") && argc == 2)
10649 return new (alloc_node()) xpath_ast_node(
10650 ast_func_substring_before, xpath_type_string, args[0], args[1]);
10651 else if (name == PUGIXML_TEXT("substring-after") && argc == 2)
10652 return new (alloc_node()) xpath_ast_node(
10653 ast_func_substring_after, xpath_type_string, args[0], args[1]);
10654 else if (name == PUGIXML_TEXT("substring") && (argc == 2 || argc == 3))
10655 return new (alloc_node()) xpath_ast_node(
10656 argc == 2 ? ast_func_substring_2 : ast_func_substring_3,
10657 xpath_type_string, args[0], args[1]);
10658 else if (name == PUGIXML_TEXT("sum") && argc == 1) {
10659 if (args[0]->rettype() != xpath_type_node_set)
10660 throw_error("Function has to be applied to node set");
10661 return new (alloc_node())
10662 xpath_ast_node(ast_func_sum, xpath_type_number, args[0]);
10663 }
10664
10665 break;
10666
10667 case 't':
10668 if (name == PUGIXML_TEXT("translate") && argc == 3)
10669 return new (alloc_node()) xpath_ast_node(
10670 ast_func_translate, xpath_type_string, args[0], args[1]);
10671 else if (name == PUGIXML_TEXT("true") && argc == 0)
10672 return new (alloc_node())
10673 xpath_ast_node(ast_func_true, xpath_type_boolean);
10674
10675 break;
10676
10677 default:
10678 break;
10679 }
10680
10681 throw_error("Unrecognized function or wrong parameter count");
10682
10683 return 0;
10684 }
10685
10686 axis_t parse_axis_name(const xpath_lexer_string& name, bool& specified) {
10687 specified = true;
10688
10689 switch (name.begin[0]) {
10690 case 'a':
10691 if (name == PUGIXML_TEXT("ancestor"))
10692 return axis_ancestor;
10693 else if (name == PUGIXML_TEXT("ancestor-or-self"))
10694 return axis_ancestor_or_self;
10695 else if (name == PUGIXML_TEXT("attribute"))
10696 return axis_attribute;
10697
10698 break;
10699
10700 case 'c':
10701 if (name == PUGIXML_TEXT("child")) return axis_child;
10702
10703 break;
10704
10705 case 'd':
10706 if (name == PUGIXML_TEXT("descendant"))
10707 return axis_descendant;
10708 else if (name == PUGIXML_TEXT("descendant-or-self"))
10709 return axis_descendant_or_self;
10710
10711 break;
10712
10713 case 'f':
10714 if (name == PUGIXML_TEXT("following"))
10715 return axis_following;
10716 else if (name == PUGIXML_TEXT("following-sibling"))
10717 return axis_following_sibling;
10718
10719 break;
10720
10721 case 'n':
10722 if (name == PUGIXML_TEXT("namespace")) return axis_namespace;
10723
10724 break;
10725
10726 case 'p':
10727 if (name == PUGIXML_TEXT("parent"))
10728 return axis_parent;
10729 else if (name == PUGIXML_TEXT("preceding"))
10730 return axis_preceding;
10731 else if (name == PUGIXML_TEXT("preceding-sibling"))
10732 return axis_preceding_sibling;
10733
10734 break;
10735
10736 case 's':
10737 if (name == PUGIXML_TEXT("self")) return axis_self;
10738
10739 break;
10740
10741 default:
10742 break;
10743 }
10744
10745 specified = false;
10746 return axis_child;
10747 }
10748
10749 nodetest_t parse_node_test_type(const xpath_lexer_string& name) {
10750 switch (name.begin[0]) {
10751 case 'c':
10752 if (name == PUGIXML_TEXT("comment")) return nodetest_type_comment;
10753
10754 break;
10755
10756 case 'n':
10757 if (name == PUGIXML_TEXT("node")) return nodetest_type_node;
10758
10759 break;
10760
10761 case 'p':
10762 if (name == PUGIXML_TEXT("processing-instruction"))
10763 return nodetest_type_pi;
10764
10765 break;
10766
10767 case 't':
10768 if (name == PUGIXML_TEXT("text")) return nodetest_type_text;
10769
10770 break;
10771
10772 default:
10773 break;
10774 }
10775
10776 return nodetest_none;
10777 }
10778
10779 // PrimaryExpr ::= VariableReference | '(' Expr ')' | Literal | Number |
10780 // FunctionCall
10781 xpath_ast_node* parse_primary_expression() {
10782 switch (_lexer.current()) {
10783 case lex_var_ref: {
10784 xpath_lexer_string name = _lexer.contents();
10785
10786 if (!_variables)
10787 throw_error("Unknown variable: variable set is not provided");
10788
10789 xpath_variable* var = 0;
10790 if (!get_variable_scratch(_scratch, _variables, name.begin, name.end,
10791 &var))
10792 throw_error_oom();
10793
10794 if (!var)
10795 throw_error(
10796 "Unknown variable: variable set does not contain the given name");
10797
10798 _lexer.next();
10799
10800 return new (alloc_node())
10801 xpath_ast_node(ast_variable, var->type(), var);
10802 }
10803
10804 case lex_open_brace: {
10805 _lexer.next();
10806
10807 xpath_ast_node* n = parse_expression();
10808
10809 if (_lexer.current() != lex_close_brace)
10810 throw_error("Unmatched braces");
10811
10812 _lexer.next();
10813
10814 return n;
10815 }
10816
10817 case lex_quoted_string: {
10818 const char_t* value = alloc_string(_lexer.contents());
10819
10820 xpath_ast_node* n = new (alloc_node())
10821 xpath_ast_node(ast_string_constant, xpath_type_string, value);
10822 _lexer.next();
10823
10824 return n;
10825 }
10826
10827 case lex_number: {
10828 double value = 0;
10829
10830 if (!convert_string_to_number_scratch(_scratch, _lexer.contents().begin,
10831 _lexer.contents().end, &value))
10832 throw_error_oom();
10833
10834 xpath_ast_node* n = new (alloc_node())
10835 xpath_ast_node(ast_number_constant, xpath_type_number, value);
10836 _lexer.next();
10837
10838 return n;
10839 }
10840
10841 case lex_string: {
10842 xpath_ast_node* args[2] = {0};
10843 size_t argc = 0;
10844
10845 xpath_lexer_string function = _lexer.contents();
10846 _lexer.next();
10847
10848 xpath_ast_node* last_arg = 0;
10849
10850 if (_lexer.current() != lex_open_brace)
10851 throw_error("Unrecognized function call");
10852 _lexer.next();
10853
10854 if (_lexer.current() != lex_close_brace)
10855 args[argc++] = parse_expression();
10856
10857 while (_lexer.current() != lex_close_brace) {
10858 if (_lexer.current() != lex_comma)
10859 throw_error("No comma between function arguments");
10860 _lexer.next();
10861
10862 xpath_ast_node* n = parse_expression();
10863
10864 if (argc < 2)
10865 args[argc] = n;
10866 else
10867 last_arg->set_next(n);
10868
10869 argc++;
10870 last_arg = n;
10871 }
10872
10873 _lexer.next();
10874
10875 return parse_function(function, argc, args);
10876 }
10877
10878 default:
10879 throw_error("Unrecognizable primary expression");
10880
10881 return 0;
10882 }
10883 }
10884
10885 // FilterExpr ::= PrimaryExpr | FilterExpr Predicate
10886 // Predicate ::= '[' PredicateExpr ']'
10887 // PredicateExpr ::= Expr
10888 xpath_ast_node* parse_filter_expression() {
10889 xpath_ast_node* n = parse_primary_expression();
10890
10891 while (_lexer.current() == lex_open_square_brace) {
10892 _lexer.next();
10893
10894 xpath_ast_node* expr = parse_expression();
10895
10896 if (n->rettype() != xpath_type_node_set)
10897 throw_error("Predicate has to be applied to node set");
10898
10899 n = new (alloc_node())
10900 xpath_ast_node(ast_filter, n, expr, predicate_default);
10901
10902 if (_lexer.current() != lex_close_square_brace)
10903 throw_error("Unmatched square brace");
10904
10905 _lexer.next();
10906 }
10907
10908 return n;
10909 }
10910
10911 // Step ::= AxisSpecifier NodeTest Predicate* | AbbreviatedStep
10912 // AxisSpecifier ::= AxisName '::' | '@'?
10913 // NodeTest ::= NameTest | NodeType '(' ')' | 'processing-instruction' '('
10914 // Literal ')' NameTest ::= '*' | NCName ':' '*' | QName AbbreviatedStep ::=
10915 // '.' | '..'
10916 xpath_ast_node* parse_step(xpath_ast_node* set) {
10917 if (set && set->rettype() != xpath_type_node_set)
10918 throw_error("Step has to be applied to node set");
10919
10920 bool axis_specified = false;
10921 axis_t axis = axis_child; // implied child axis
10922
10923 if (_lexer.current() == lex_axis_attribute) {
10924 axis = axis_attribute;
10925 axis_specified = true;
10926
10927 _lexer.next();
10928 } else if (_lexer.current() == lex_dot) {
10929 _lexer.next();
10930
10931 return new (alloc_node())
10932 xpath_ast_node(ast_step, set, axis_self, nodetest_type_node, 0);
10933 } else if (_lexer.current() == lex_double_dot) {
10934 _lexer.next();
10935
10936 return new (alloc_node())
10937 xpath_ast_node(ast_step, set, axis_parent, nodetest_type_node, 0);
10938 }
10939
10940 nodetest_t nt_type = nodetest_none;
10941 xpath_lexer_string nt_name;
10942
10943 if (_lexer.current() == lex_string) {
10944 // node name test
10945 nt_name = _lexer.contents();
10946 _lexer.next();
10947
10948 // was it an axis name?
10949 if (_lexer.current() == lex_double_colon) {
10950 // parse axis name
10951 if (axis_specified) throw_error("Two axis specifiers in one step");
10952
10953 axis = parse_axis_name(nt_name, axis_specified);
10954
10955 if (!axis_specified) throw_error("Unknown axis");
10956
10957 // read actual node test
10958 _lexer.next();
10959
10960 if (_lexer.current() == lex_multiply) {
10961 nt_type = nodetest_all;
10962 nt_name = xpath_lexer_string();
10963 _lexer.next();
10964 } else if (_lexer.current() == lex_string) {
10965 nt_name = _lexer.contents();
10966 _lexer.next();
10967 } else
10968 throw_error("Unrecognized node test");
10969 }
10970
10971 if (nt_type == nodetest_none) {
10972 // node type test or processing-instruction
10973 if (_lexer.current() == lex_open_brace) {
10974 _lexer.next();
10975
10976 if (_lexer.current() == lex_close_brace) {
10977 _lexer.next();
10978
10979 nt_type = parse_node_test_type(nt_name);
10980
10981 if (nt_type == nodetest_none) throw_error("Unrecognized node type");
10982
10983 nt_name = xpath_lexer_string();
10984 } else if (nt_name == PUGIXML_TEXT("processing-instruction")) {
10985 if (_lexer.current() != lex_quoted_string)
10986 throw_error(
10987 "Only literals are allowed as arguments to "
10988 "processing-instruction()");
10989
10990 nt_type = nodetest_pi;
10991 nt_name = _lexer.contents();
10992 _lexer.next();
10993
10994 if (_lexer.current() != lex_close_brace)
10995 throw_error("Unmatched brace near processing-instruction()");
10996 _lexer.next();
10997 } else {
10998 throw_error("Unmatched brace near node type test");
10999 }
11000 }
11001 // QName or NCName:*
11002 else {
11003 if (nt_name.end - nt_name.begin > 2 && nt_name.end[-2] == ':' &&
11004 nt_name.end[-1] == '*') // NCName:*
11005 {
11006 nt_name.end--; // erase *
11007
11008 nt_type = nodetest_all_in_namespace;
11009 } else {
11010 nt_type = nodetest_name;
11011 }
11012 }
11013 }
11014 } else if (_lexer.current() == lex_multiply) {
11015 nt_type = nodetest_all;
11016 _lexer.next();
11017 } else {
11018 throw_error("Unrecognized node test");
11019 }
11020
11021 const char_t* nt_name_copy = alloc_string(nt_name);
11022 xpath_ast_node* n = new (alloc_node())
11023 xpath_ast_node(ast_step, set, axis, nt_type, nt_name_copy);
11024
11025 xpath_ast_node* last = 0;
11026
11027 while (_lexer.current() == lex_open_square_brace) {
11028 _lexer.next();
11029
11030 xpath_ast_node* expr = parse_expression();
11031
11032 xpath_ast_node* pred = new (alloc_node())
11033 xpath_ast_node(ast_predicate, 0, expr, predicate_default);
11034
11035 if (_lexer.current() != lex_close_square_brace)
11036 throw_error("Unmatched square brace");
11037 _lexer.next();
11038
11039 if (last)
11040 last->set_next(pred);
11041 else
11042 n->set_right(pred);
11043
11044 last = pred;
11045 }
11046
11047 return n;
11048 }
11049
11050 // RelativeLocationPath ::= Step | RelativeLocationPath '/' Step |
11051 // RelativeLocationPath '//' Step
11052 xpath_ast_node* parse_relative_location_path(xpath_ast_node* set) {
11053 xpath_ast_node* n = parse_step(set);
11054
11055 while (_lexer.current() == lex_slash ||
11056 _lexer.current() == lex_double_slash) {
11057 lexeme_t l = _lexer.current();
11058 _lexer.next();
11059
11060 if (l == lex_double_slash)
11061 n = new (alloc_node()) xpath_ast_node(
11062 ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
11063
11064 n = parse_step(n);
11065 }
11066
11067 return n;
11068 }
11069
11070 // LocationPath ::= RelativeLocationPath | AbsoluteLocationPath
11071 // AbsoluteLocationPath ::= '/' RelativeLocationPath? | '//'
11072 // RelativeLocationPath
11073 xpath_ast_node* parse_location_path() {
11074 if (_lexer.current() == lex_slash) {
11075 _lexer.next();
11076
11077 xpath_ast_node* n =
11078 new (alloc_node()) xpath_ast_node(ast_step_root, xpath_type_node_set);
11079
11080 // relative location path can start from axis_attribute, dot, double_dot,
11081 // multiply and string lexemes; any other lexeme means standalone root
11082 // path
11083 lexeme_t l = _lexer.current();
11084
11085 if (l == lex_string || l == lex_axis_attribute || l == lex_dot ||
11086 l == lex_double_dot || l == lex_multiply)
11087 return parse_relative_location_path(n);
11088 else
11089 return n;
11090 } else if (_lexer.current() == lex_double_slash) {
11091 _lexer.next();
11092
11093 xpath_ast_node* n =
11094 new (alloc_node()) xpath_ast_node(ast_step_root, xpath_type_node_set);
11095 n = new (alloc_node()) xpath_ast_node(
11096 ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
11097
11098 return parse_relative_location_path(n);
11099 }
11100
11101 // else clause moved outside of if because of bogus warning 'control may
11102 // reach end of non-void function being inlined' in gcc 4.0.1
11103 return parse_relative_location_path(0);
11104 }
11105
11106 // PathExpr ::= LocationPath
11107 // | FilterExpr
11108 // | FilterExpr '/' RelativeLocationPath
11109 // | FilterExpr '//' RelativeLocationPath
11110 // UnionExpr ::= PathExpr | UnionExpr '|' PathExpr
11111 // UnaryExpr ::= UnionExpr | '-' UnaryExpr
11112 xpath_ast_node* parse_path_or_unary_expression() {
11113 // Clarification.
11114 // PathExpr begins with either LocationPath or FilterExpr.
11115 // FilterExpr begins with PrimaryExpr
11116 // PrimaryExpr begins with '$' in case of it being a variable reference,
11117 // '(' in case of it being an expression, string literal, number constant or
11118 // function call.
11119
11120 if (_lexer.current() == lex_var_ref || _lexer.current() == lex_open_brace ||
11121 _lexer.current() == lex_quoted_string ||
11122 _lexer.current() == lex_number || _lexer.current() == lex_string) {
11123 if (_lexer.current() == lex_string) {
11124 // This is either a function call, or not - if not, we shall proceed
11125 // with location path
11126 const char_t* state = _lexer.state();
11127
11128 while (PUGI__IS_CHARTYPE(*state, ct_space)) ++state;
11129
11130 if (*state != '(') return parse_location_path();
11131
11132 // This looks like a function call; however this still can be a
11133 // node-test. Check it.
11134 if (parse_node_test_type(_lexer.contents()) != nodetest_none)
11135 return parse_location_path();
11136 }
11137
11138 xpath_ast_node* n = parse_filter_expression();
11139
11140 if (_lexer.current() == lex_slash ||
11141 _lexer.current() == lex_double_slash) {
11142 lexeme_t l = _lexer.current();
11143 _lexer.next();
11144
11145 if (l == lex_double_slash) {
11146 if (n->rettype() != xpath_type_node_set)
11147 throw_error("Step has to be applied to node set");
11148
11149 n = new (alloc_node()) xpath_ast_node(
11150 ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
11151 }
11152
11153 // select from location path
11154 return parse_relative_location_path(n);
11155 }
11156
11157 return n;
11158 } else if (_lexer.current() == lex_minus) {
11159 _lexer.next();
11160
11161 // precedence 7+ - only parses union expressions
11162 xpath_ast_node* expr =
11163 parse_expression_rec(parse_path_or_unary_expression(), 7);
11164
11165 return new (alloc_node())
11166 xpath_ast_node(ast_op_negate, xpath_type_number, expr);
11167 } else {
11168 return parse_location_path();
11169 }
11170 }
11171
11173 ast_type_t asttype;
11174 xpath_value_type rettype;
11175 int precedence;
11176
11177 binary_op_t()
11178 : asttype(ast_unknown), rettype(xpath_type_none), precedence(0) {}
11179
11180 binary_op_t(ast_type_t asttype_, xpath_value_type rettype_, int precedence_)
11181 : asttype(asttype_), rettype(rettype_), precedence(precedence_) {}
11182
11183 static binary_op_t parse(xpath_lexer& lexer) {
11184 switch (lexer.current()) {
11185 case lex_string:
11186 if (lexer.contents() == PUGIXML_TEXT("or"))
11187 return binary_op_t(ast_op_or, xpath_type_boolean, 1);
11188 else if (lexer.contents() == PUGIXML_TEXT("and"))
11189 return binary_op_t(ast_op_and, xpath_type_boolean, 2);
11190 else if (lexer.contents() == PUGIXML_TEXT("div"))
11191 return binary_op_t(ast_op_divide, xpath_type_number, 6);
11192 else if (lexer.contents() == PUGIXML_TEXT("mod"))
11193 return binary_op_t(ast_op_mod, xpath_type_number, 6);
11194 else
11195 return binary_op_t();
11196
11197 case lex_equal:
11198 return binary_op_t(ast_op_equal, xpath_type_boolean, 3);
11199
11200 case lex_not_equal:
11201 return binary_op_t(ast_op_not_equal, xpath_type_boolean, 3);
11202
11203 case lex_less:
11204 return binary_op_t(ast_op_less, xpath_type_boolean, 4);
11205
11206 case lex_greater:
11207 return binary_op_t(ast_op_greater, xpath_type_boolean, 4);
11208
11209 case lex_less_or_equal:
11210 return binary_op_t(ast_op_less_or_equal, xpath_type_boolean, 4);
11211
11212 case lex_greater_or_equal:
11213 return binary_op_t(ast_op_greater_or_equal, xpath_type_boolean, 4);
11214
11215 case lex_plus:
11216 return binary_op_t(ast_op_add, xpath_type_number, 5);
11217
11218 case lex_minus:
11219 return binary_op_t(ast_op_subtract, xpath_type_number, 5);
11220
11221 case lex_multiply:
11222 return binary_op_t(ast_op_multiply, xpath_type_number, 6);
11223
11224 case lex_union:
11225 return binary_op_t(ast_op_union, xpath_type_node_set, 7);
11226
11227 default:
11228 return binary_op_t();
11229 }
11230 }
11231 };
11232
11233 xpath_ast_node* parse_expression_rec(xpath_ast_node* lhs, int limit) {
11234 binary_op_t op = binary_op_t::parse(_lexer);
11235
11236 while (op.asttype != ast_unknown && op.precedence >= limit) {
11237 _lexer.next();
11238
11239 xpath_ast_node* rhs = parse_path_or_unary_expression();
11240
11241 binary_op_t nextop = binary_op_t::parse(_lexer);
11242
11243 while (nextop.asttype != ast_unknown &&
11244 nextop.precedence > op.precedence) {
11245 rhs = parse_expression_rec(rhs, nextop.precedence);
11246
11247 nextop = binary_op_t::parse(_lexer);
11248 }
11249
11250 if (op.asttype == ast_op_union &&
11251 (lhs->rettype() != xpath_type_node_set ||
11252 rhs->rettype() != xpath_type_node_set))
11253 throw_error("Union operator has to be applied to node sets");
11254
11255 lhs = new (alloc_node()) xpath_ast_node(op.asttype, op.rettype, lhs, rhs);
11256
11257 op = binary_op_t::parse(_lexer);
11258 }
11259
11260 return lhs;
11261 }
11262
11263 // Expr ::= OrExpr
11264 // OrExpr ::= AndExpr | OrExpr 'or' AndExpr
11265 // AndExpr ::= EqualityExpr | AndExpr 'and' EqualityExpr
11266 // EqualityExpr ::= RelationalExpr
11267 // | EqualityExpr '=' RelationalExpr
11268 // | EqualityExpr '!=' RelationalExpr
11269 // RelationalExpr ::= AdditiveExpr
11270 // | RelationalExpr '<' AdditiveExpr
11271 // | RelationalExpr '>' AdditiveExpr
11272 // | RelationalExpr '<=' AdditiveExpr
11273 // | RelationalExpr '>=' AdditiveExpr
11274 // AdditiveExpr ::= MultiplicativeExpr
11275 // | AdditiveExpr '+' MultiplicativeExpr
11276 // | AdditiveExpr '-' MultiplicativeExpr
11277 // MultiplicativeExpr ::= UnaryExpr
11278 // | MultiplicativeExpr '*' UnaryExpr
11279 // | MultiplicativeExpr 'div' UnaryExpr
11280 // | MultiplicativeExpr 'mod' UnaryExpr
11281 xpath_ast_node* parse_expression() {
11282 return parse_expression_rec(parse_path_or_unary_expression(), 0);
11283 }
11284
11285 xpath_parser(const char_t* query, xpath_variable_set* variables,
11286 xpath_allocator* alloc, xpath_parse_result* result)
11287 : _alloc(alloc),
11288 _lexer(query),
11289 _query(query),
11290 _variables(variables),
11291 _result(result) {}
11292
11293 xpath_ast_node* parse() {
11294 xpath_ast_node* result = parse_expression();
11295
11296 // check if there are unparsed tokens left
11297 if (_lexer.current() != lex_eof) throw_error("Incorrect query");
11298
11299 return result;
11300 }
11301
11302 static xpath_ast_node* parse(const char_t* query,
11303 xpath_variable_set* variables,
11304 xpath_allocator* alloc,
11305 xpath_parse_result* result) {
11306 xpath_parser parser(query, variables, alloc, result);
11307
11308#ifdef PUGIXML_NO_EXCEPTIONS
11309 int error = setjmp(parser._error_handler);
11310
11311 return (error == 0) ? parser.parse() : 0;
11312#else
11313 return parser.parse();
11314#endif
11315 }
11316};
11317
11319 static xpath_query_impl* create() {
11320 void* memory = xml_memory::allocate(sizeof(xpath_query_impl));
11321 if (!memory) return 0;
11322
11323 return new (memory) xpath_query_impl();
11324 }
11325
11326 static void destroy(xpath_query_impl* impl) {
11327 // free all allocated pages
11328 impl->alloc.release();
11329
11330 // free allocator memory (with the first page)
11331 xml_memory::deallocate(impl);
11332 }
11333
11334 xpath_query_impl() : root(0), alloc(&block) {
11335 block.next = 0;
11336 block.capacity = sizeof(block.data);
11337 }
11338
11339 xpath_ast_node* root;
11340 xpath_allocator alloc;
11342};
11343
11344PUGI__FN xpath_string evaluate_string_impl(xpath_query_impl* impl,
11345 const xpath_node& n,
11346 xpath_stack_data& sd) {
11347 if (!impl) return xpath_string();
11348
11349#ifdef PUGIXML_NO_EXCEPTIONS
11350 if (setjmp(sd.error_handler)) return xpath_string();
11351#endif
11352
11353 xpath_context c(n, 1, 1);
11354
11355 return impl->root->eval_string(c, sd.stack);
11356}
11357
11358PUGI__FN impl::xpath_ast_node* evaluate_node_set_prepare(
11359 xpath_query_impl* impl) {
11360 if (!impl) return 0;
11361
11362 if (impl->root->rettype() != xpath_type_node_set) {
11363#ifdef PUGIXML_NO_EXCEPTIONS
11364 return 0;
11365#else
11366 xpath_parse_result res;
11367 res.error = "Expression does not evaluate to node set";
11368
11369 throw xpath_exception(res);
11370#endif
11371 }
11372
11373 return impl->root;
11374}
11375PUGI__NS_END
11376
11377namespace pugi {
11378#ifndef PUGIXML_NO_EXCEPTIONS
11379PUGI__FN xpath_exception::xpath_exception(const xpath_parse_result& result_)
11380 : _result(result_) {
11381 assert(_result.error);
11382}
11383
11384PUGI__FN const char* xpath_exception::what() const throw() {
11385 return _result.error;
11386}
11387
11388PUGI__FN const xpath_parse_result& xpath_exception::result() const {
11389 return _result;
11390}
11391#endif
11392
11393PUGI__FN xpath_node::xpath_node() {}
11394
11395PUGI__FN xpath_node::xpath_node(const xml_node& node_) : _node(node_) {}
11396
11397PUGI__FN xpath_node::xpath_node(const xml_attribute& attribute_,
11398 const xml_node& parent_)
11399 : _node(attribute_ ? parent_ : xml_node()), _attribute(attribute_) {}
11400
11401PUGI__FN xml_node xpath_node::node() const {
11402 return _attribute ? xml_node() : _node;
11403}
11404
11405PUGI__FN xml_attribute xpath_node::attribute() const { return _attribute; }
11406
11407PUGI__FN xml_node xpath_node::parent() const {
11408 return _attribute ? _node : _node.parent();
11409}
11410
11411PUGI__FN static void unspecified_bool_xpath_node(xpath_node***) {}
11412
11413PUGI__FN xpath_node::operator xpath_node::unspecified_bool_type() const {
11414 return (_node || _attribute) ? unspecified_bool_xpath_node : 0;
11415}
11416
11417PUGI__FN bool xpath_node::operator!() const { return !(_node || _attribute); }
11418
11419PUGI__FN bool xpath_node::operator==(const xpath_node& n) const {
11420 return _node == n._node && _attribute == n._attribute;
11421}
11422
11423PUGI__FN bool xpath_node::operator!=(const xpath_node& n) const {
11424 return _node != n._node || _attribute != n._attribute;
11425}
11426
11427#ifdef __BORLANDC__
11428PUGI__FN bool operator&&(const xpath_node& lhs, bool rhs) {
11429 return (bool)lhs && rhs;
11430}
11431
11432PUGI__FN bool operator||(const xpath_node& lhs, bool rhs) {
11433 return (bool)lhs || rhs;
11434}
11435#endif
11436
11437PUGI__FN void xpath_node_set::_assign(const_iterator begin_,
11438 const_iterator end_, type_t type_) {
11439 assert(begin_ <= end_);
11440
11441 size_t size_ = static_cast<size_t>(end_ - begin_);
11442
11443 if (size_ <= 1) {
11444 // deallocate old buffer
11445 if (_begin != &_storage) impl::xml_memory::deallocate(_begin);
11446
11447 // use internal buffer
11448 if (begin_ != end_) _storage = *begin_;
11449
11450 _begin = &_storage;
11451 _end = &_storage + size_;
11452 _type = type_;
11453 } else {
11454 // make heap copy
11455 xpath_node* storage = static_cast<xpath_node*>(
11456 impl::xml_memory::allocate(size_ * sizeof(xpath_node)));
11457
11458 if (!storage) {
11459#ifdef PUGIXML_NO_EXCEPTIONS
11460 return;
11461#else
11462 throw std::bad_alloc();
11463#endif
11464 }
11465
11466 memcpy(storage, begin_, size_ * sizeof(xpath_node));
11467
11468 // deallocate old buffer
11469 if (_begin != &_storage) impl::xml_memory::deallocate(_begin);
11470
11471 // finalize
11472 _begin = storage;
11473 _end = storage + size_;
11474 _type = type_;
11475 }
11476}
11477
11478#ifdef PUGIXML_HAS_MOVE
11479PUGI__FN void xpath_node_set::_move(xpath_node_set& rhs) {
11480 _type = rhs._type;
11481 _storage = rhs._storage;
11482 _begin = (rhs._begin == &rhs._storage) ? &_storage : rhs._begin;
11483 _end = _begin + (rhs._end - rhs._begin);
11484
11485 rhs._type = type_unsorted;
11486 rhs._begin = &rhs._storage;
11487 rhs._end = rhs._begin;
11488}
11489#endif
11490
11491PUGI__FN xpath_node_set::xpath_node_set()
11492 : _type(type_unsorted), _begin(&_storage), _end(&_storage) {}
11493
11494PUGI__FN xpath_node_set::xpath_node_set(const_iterator begin_,
11495 const_iterator end_, type_t type_)
11496 : _type(type_unsorted), _begin(&_storage), _end(&_storage) {
11497 _assign(begin_, end_, type_);
11498}
11499
11500PUGI__FN xpath_node_set::~xpath_node_set() {
11501 if (_begin != &_storage) impl::xml_memory::deallocate(_begin);
11502}
11503
11504PUGI__FN xpath_node_set::xpath_node_set(const xpath_node_set& ns)
11505 : _type(type_unsorted), _begin(&_storage), _end(&_storage) {
11506 _assign(ns._begin, ns._end, ns._type);
11507}
11508
11509PUGI__FN xpath_node_set& xpath_node_set::operator=(const xpath_node_set& ns) {
11510 if (this == &ns) return *this;
11511
11512 _assign(ns._begin, ns._end, ns._type);
11513
11514 return *this;
11515}
11516
11517#ifdef PUGIXML_HAS_MOVE
11518PUGI__FN xpath_node_set::xpath_node_set(xpath_node_set&& rhs)
11519 : _type(type_unsorted), _begin(&_storage), _end(&_storage) {
11520 _move(rhs);
11521}
11522
11523PUGI__FN xpath_node_set& xpath_node_set::operator=(xpath_node_set&& rhs) {
11524 if (this == &rhs) return *this;
11525
11526 if (_begin != &_storage) impl::xml_memory::deallocate(_begin);
11527
11528 _move(rhs);
11529
11530 return *this;
11531}
11532#endif
11533
11534PUGI__FN xpath_node_set::type_t xpath_node_set::type() const { return _type; }
11535
11536PUGI__FN size_t xpath_node_set::size() const { return _end - _begin; }
11537
11538PUGI__FN bool xpath_node_set::empty() const { return _begin == _end; }
11539
11540PUGI__FN const xpath_node& xpath_node_set::operator[](size_t index) const {
11541 assert(index < size());
11542 return _begin[index];
11543}
11544
11545PUGI__FN xpath_node_set::const_iterator xpath_node_set::begin() const {
11546 return _begin;
11547}
11548
11549PUGI__FN xpath_node_set::const_iterator xpath_node_set::end() const {
11550 return _end;
11551}
11552
11553PUGI__FN void xpath_node_set::sort(bool reverse) {
11554 _type = impl::xpath_sort(_begin, _end, _type, reverse);
11555}
11556
11557PUGI__FN xpath_node xpath_node_set::first() const {
11558 return impl::xpath_first(_begin, _end, _type);
11559}
11560
11561PUGI__FN xpath_parse_result::xpath_parse_result()
11562 : error("Internal error"), offset(0) {}
11563
11564PUGI__FN xpath_parse_result::operator bool() const { return error == 0; }
11565
11566PUGI__FN const char* xpath_parse_result::description() const {
11567 return error ? error : "No error";
11568}
11569
11570PUGI__FN xpath_variable::xpath_variable(xpath_value_type type_)
11571 : _type(type_), _next(0) {}
11572
11573PUGI__FN const char_t* xpath_variable::name() const {
11574 switch (_type) {
11575 case xpath_type_node_set:
11576 return static_cast<const impl::xpath_variable_node_set*>(this)->name;
11577
11578 case xpath_type_number:
11579 return static_cast<const impl::xpath_variable_number*>(this)->name;
11580
11581 case xpath_type_string:
11582 return static_cast<const impl::xpath_variable_string*>(this)->name;
11583
11584 case xpath_type_boolean:
11585 return static_cast<const impl::xpath_variable_boolean*>(this)->name;
11586
11587 default:
11588 assert(false && "Invalid variable type");
11589 return 0;
11590 }
11591}
11592
11593PUGI__FN xpath_value_type xpath_variable::type() const { return _type; }
11594
11595PUGI__FN bool xpath_variable::get_boolean() const {
11596 return (_type == xpath_type_boolean)
11597 ? static_cast<const impl::xpath_variable_boolean*>(this)->value
11598 : false;
11599}
11600
11601PUGI__FN double xpath_variable::get_number() const {
11602 return (_type == xpath_type_number)
11603 ? static_cast<const impl::xpath_variable_number*>(this)->value
11604 : impl::gen_nan();
11605}
11606
11607PUGI__FN const char_t* xpath_variable::get_string() const {
11608 const char_t* value =
11609 (_type == xpath_type_string)
11610 ? static_cast<const impl::xpath_variable_string*>(this)->value
11611 : 0;
11612 return value ? value : PUGIXML_TEXT("");
11613}
11614
11615PUGI__FN const xpath_node_set& xpath_variable::get_node_set() const {
11616 return (_type == xpath_type_node_set)
11617 ? static_cast<const impl::xpath_variable_node_set*>(this)->value
11618 : impl::dummy_node_set;
11619}
11620
11621PUGI__FN bool xpath_variable::set(bool value) {
11622 if (_type != xpath_type_boolean) return false;
11623
11624 static_cast<impl::xpath_variable_boolean*>(this)->value = value;
11625 return true;
11626}
11627
11628PUGI__FN bool xpath_variable::set(double value) {
11629 if (_type != xpath_type_number) return false;
11630
11631 static_cast<impl::xpath_variable_number*>(this)->value = value;
11632 return true;
11633}
11634
11635PUGI__FN bool xpath_variable::set(const char_t* value) {
11636 if (_type != xpath_type_string) return false;
11637
11638 impl::xpath_variable_string* var =
11639 static_cast<impl::xpath_variable_string*>(this);
11640
11641 // duplicate string
11642 size_t size = (impl::strlength(value) + 1) * sizeof(char_t);
11643
11644 char_t* copy = static_cast<char_t*>(impl::xml_memory::allocate(size));
11645 if (!copy) return false;
11646
11647 memcpy(copy, value, size);
11648
11649 // replace old string
11650 if (var->value) impl::xml_memory::deallocate(var->value);
11651 var->value = copy;
11652
11653 return true;
11654}
11655
11656PUGI__FN bool xpath_variable::set(const xpath_node_set& value) {
11657 if (_type != xpath_type_node_set) return false;
11658
11659 static_cast<impl::xpath_variable_node_set*>(this)->value = value;
11660 return true;
11661}
11662
11663PUGI__FN xpath_variable_set::xpath_variable_set() {
11664 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) _data[i] = 0;
11665}
11666
11667PUGI__FN xpath_variable_set::~xpath_variable_set() {
11668 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
11669 _destroy(_data[i]);
11670}
11671
11672PUGI__FN xpath_variable_set::xpath_variable_set(const xpath_variable_set& rhs) {
11673 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) _data[i] = 0;
11674
11675 _assign(rhs);
11676}
11677
11678PUGI__FN xpath_variable_set& xpath_variable_set::operator=(
11679 const xpath_variable_set& rhs) {
11680 if (this == &rhs) return *this;
11681
11682 _assign(rhs);
11683
11684 return *this;
11685}
11686
11687#ifdef PUGIXML_HAS_MOVE
11688PUGI__FN xpath_variable_set::xpath_variable_set(xpath_variable_set&& rhs) {
11689 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) {
11690 _data[i] = rhs._data[i];
11691 rhs._data[i] = 0;
11692 }
11693}
11694
11695PUGI__FN xpath_variable_set& xpath_variable_set::operator=(
11696 xpath_variable_set&& rhs) {
11697 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) {
11698 _destroy(_data[i]);
11699
11700 _data[i] = rhs._data[i];
11701 rhs._data[i] = 0;
11702 }
11703
11704 return *this;
11705}
11706#endif
11707
11708PUGI__FN void xpath_variable_set::_assign(const xpath_variable_set& rhs) {
11709 xpath_variable_set temp;
11710
11711 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
11712 if (rhs._data[i] && !_clone(rhs._data[i], &temp._data[i])) return;
11713
11714 _swap(temp);
11715}
11716
11717PUGI__FN void xpath_variable_set::_swap(xpath_variable_set& rhs) {
11718 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) {
11719 xpath_variable* chain = _data[i];
11720
11721 _data[i] = rhs._data[i];
11722 rhs._data[i] = chain;
11723 }
11724}
11725
11726PUGI__FN xpath_variable* xpath_variable_set::_find(const char_t* name) const {
11727 const size_t hash_size = sizeof(_data) / sizeof(_data[0]);
11728 size_t hash = impl::hash_string(name) % hash_size;
11729
11730 // look for existing variable
11731 for (xpath_variable* var = _data[hash]; var; var = var->_next)
11732 if (impl::strequal(var->name(), name)) return var;
11733
11734 return 0;
11735}
11736
11737PUGI__FN bool xpath_variable_set::_clone(xpath_variable* var,
11738 xpath_variable** out_result) {
11739 xpath_variable* last = 0;
11740
11741 while (var) {
11742 // allocate storage for new variable
11743 xpath_variable* nvar = impl::new_xpath_variable(var->_type, var->name());
11744 if (!nvar) return false;
11745
11746 // link the variable to the result immediately to handle failures gracefully
11747 if (last)
11748 last->_next = nvar;
11749 else
11750 *out_result = nvar;
11751
11752 last = nvar;
11753
11754 // copy the value; this can fail due to out-of-memory conditions
11755 if (!impl::copy_xpath_variable(nvar, var)) return false;
11756
11757 var = var->_next;
11758 }
11759
11760 return true;
11761}
11762
11763PUGI__FN void xpath_variable_set::_destroy(xpath_variable* var) {
11764 while (var) {
11765 xpath_variable* next = var->_next;
11766
11767 impl::delete_xpath_variable(var->_type, var);
11768
11769 var = next;
11770 }
11771}
11772
11773PUGI__FN xpath_variable* xpath_variable_set::add(const char_t* name,
11774 xpath_value_type type) {
11775 const size_t hash_size = sizeof(_data) / sizeof(_data[0]);
11776 size_t hash = impl::hash_string(name) % hash_size;
11777
11778 // look for existing variable
11779 for (xpath_variable* var = _data[hash]; var; var = var->_next)
11780 if (impl::strequal(var->name(), name)) return var->type() == type ? var : 0;
11781
11782 // add new variable
11783 xpath_variable* result = impl::new_xpath_variable(type, name);
11784
11785 if (result) {
11786 result->_next = _data[hash];
11787
11788 _data[hash] = result;
11789 }
11790
11791 return result;
11792}
11793
11794PUGI__FN bool xpath_variable_set::set(const char_t* name, bool value) {
11795 xpath_variable* var = add(name, xpath_type_boolean);
11796 return var ? var->set(value) : false;
11797}
11798
11799PUGI__FN bool xpath_variable_set::set(const char_t* name, double value) {
11800 xpath_variable* var = add(name, xpath_type_number);
11801 return var ? var->set(value) : false;
11802}
11803
11804PUGI__FN bool xpath_variable_set::set(const char_t* name, const char_t* value) {
11805 xpath_variable* var = add(name, xpath_type_string);
11806 return var ? var->set(value) : false;
11807}
11808
11809PUGI__FN bool xpath_variable_set::set(const char_t* name,
11810 const xpath_node_set& value) {
11811 xpath_variable* var = add(name, xpath_type_node_set);
11812 return var ? var->set(value) : false;
11813}
11814
11815PUGI__FN xpath_variable* xpath_variable_set::get(const char_t* name) {
11816 return _find(name);
11817}
11818
11819PUGI__FN const xpath_variable* xpath_variable_set::get(
11820 const char_t* name) const {
11821 return _find(name);
11822}
11823
11824PUGI__FN xpath_query::xpath_query(const char_t* query,
11825 xpath_variable_set* variables)
11826 : _impl(0) {
11827 impl::xpath_query_impl* qimpl = impl::xpath_query_impl::create();
11828
11829 if (!qimpl) {
11830#ifdef PUGIXML_NO_EXCEPTIONS
11831 _result.error = "Out of memory";
11832#else
11833 throw std::bad_alloc();
11834#endif
11835 } else {
11836 using impl::auto_deleter; // MSVC7 workaround
11838 impl::xpath_query_impl::destroy);
11839
11840 qimpl->root =
11841 impl::xpath_parser::parse(query, variables, &qimpl->alloc, &_result);
11842
11843 if (qimpl->root) {
11844 qimpl->root->optimize(&qimpl->alloc);
11845
11846 _impl = impl.release();
11847 _result.error = 0;
11848 }
11849 }
11850}
11851
11852PUGI__FN xpath_query::xpath_query() : _impl(0) {}
11853
11854PUGI__FN xpath_query::~xpath_query() {
11855 if (_impl)
11856 impl::xpath_query_impl::destroy(
11857 static_cast<impl::xpath_query_impl*>(_impl));
11858}
11859
11860#ifdef PUGIXML_HAS_MOVE
11861PUGI__FN xpath_query::xpath_query(xpath_query&& rhs) {
11862 _impl = rhs._impl;
11863 _result = rhs._result;
11864 rhs._impl = 0;
11865 rhs._result = xpath_parse_result();
11866}
11867
11868PUGI__FN xpath_query& xpath_query::operator=(xpath_query&& rhs) {
11869 if (this == &rhs) return *this;
11870
11871 if (_impl)
11872 impl::xpath_query_impl::destroy(
11873 static_cast<impl::xpath_query_impl*>(_impl));
11874
11875 _impl = rhs._impl;
11876 _result = rhs._result;
11877 rhs._impl = 0;
11878 rhs._result = xpath_parse_result();
11879
11880 return *this;
11881}
11882#endif
11883
11884PUGI__FN xpath_value_type xpath_query::return_type() const {
11885 if (!_impl) return xpath_type_none;
11886
11887 return static_cast<impl::xpath_query_impl*>(_impl)->root->rettype();
11888}
11889
11890PUGI__FN bool xpath_query::evaluate_boolean(const xpath_node& n) const {
11891 if (!_impl) return false;
11892
11893 impl::xpath_context c(n, 1, 1);
11894 impl::xpath_stack_data sd;
11895
11896#ifdef PUGIXML_NO_EXCEPTIONS
11897 if (setjmp(sd.error_handler)) return false;
11898#endif
11899
11900 return static_cast<impl::xpath_query_impl*>(_impl)->root->eval_boolean(
11901 c, sd.stack);
11902}
11903
11904PUGI__FN double xpath_query::evaluate_number(const xpath_node& n) const {
11905 if (!_impl) return impl::gen_nan();
11906
11907 impl::xpath_context c(n, 1, 1);
11908 impl::xpath_stack_data sd;
11909
11910#ifdef PUGIXML_NO_EXCEPTIONS
11911 if (setjmp(sd.error_handler)) return impl::gen_nan();
11912#endif
11913
11914 return static_cast<impl::xpath_query_impl*>(_impl)->root->eval_number(
11915 c, sd.stack);
11916}
11917
11918#ifndef PUGIXML_NO_STL
11919PUGI__FN string_t xpath_query::evaluate_string(const xpath_node& n) const {
11920 impl::xpath_stack_data sd;
11921
11922 impl::xpath_string r = impl::evaluate_string_impl(
11923 static_cast<impl::xpath_query_impl*>(_impl), n, sd);
11924
11925 return string_t(r.c_str(), r.length());
11926}
11927#endif
11928
11929PUGI__FN size_t xpath_query::evaluate_string(char_t* buffer, size_t capacity,
11930 const xpath_node& n) const {
11931 impl::xpath_stack_data sd;
11932
11933 impl::xpath_string r = impl::evaluate_string_impl(
11934 static_cast<impl::xpath_query_impl*>(_impl), n, sd);
11935
11936 size_t full_size = r.length() + 1;
11937
11938 if (capacity > 0) {
11939 size_t size = (full_size < capacity) ? full_size : capacity;
11940 assert(size > 0);
11941
11942 memcpy(buffer, r.c_str(), (size - 1) * sizeof(char_t));
11943 buffer[size - 1] = 0;
11944 }
11945
11946 return full_size;
11947}
11948
11949PUGI__FN xpath_node_set
11950xpath_query::evaluate_node_set(const xpath_node& n) const {
11951 impl::xpath_ast_node* root = impl::evaluate_node_set_prepare(
11952 static_cast<impl::xpath_query_impl*>(_impl));
11953 if (!root) return xpath_node_set();
11954
11955 impl::xpath_context c(n, 1, 1);
11956 impl::xpath_stack_data sd;
11957
11958#ifdef PUGIXML_NO_EXCEPTIONS
11959 if (setjmp(sd.error_handler)) return xpath_node_set();
11960#endif
11961
11962 impl::xpath_node_set_raw r =
11963 root->eval_node_set(c, sd.stack, impl::nodeset_eval_all);
11964
11965 return xpath_node_set(r.begin(), r.end(), r.type());
11966}
11967
11968PUGI__FN xpath_node xpath_query::evaluate_node(const xpath_node& n) const {
11969 impl::xpath_ast_node* root = impl::evaluate_node_set_prepare(
11970 static_cast<impl::xpath_query_impl*>(_impl));
11971 if (!root) return xpath_node();
11972
11973 impl::xpath_context c(n, 1, 1);
11974 impl::xpath_stack_data sd;
11975
11976#ifdef PUGIXML_NO_EXCEPTIONS
11977 if (setjmp(sd.error_handler)) return xpath_node();
11978#endif
11979
11980 impl::xpath_node_set_raw r =
11981 root->eval_node_set(c, sd.stack, impl::nodeset_eval_first);
11982
11983 return r.first();
11984}
11985
11986PUGI__FN const xpath_parse_result& xpath_query::result() const {
11987 return _result;
11988}
11989
11990PUGI__FN static void unspecified_bool_xpath_query(xpath_query***) {}
11991
11992PUGI__FN xpath_query::operator xpath_query::unspecified_bool_type() const {
11993 return _impl ? unspecified_bool_xpath_query : 0;
11994}
11995
11996PUGI__FN bool xpath_query::operator!() const { return !_impl; }
11997
11998PUGI__FN xpath_node xml_node::select_node(const char_t* query,
11999 xpath_variable_set* variables) const {
12000 xpath_query q(query, variables);
12001 return select_node(q);
12002}
12003
12004PUGI__FN xpath_node xml_node::select_node(const xpath_query& query) const {
12005 return query.evaluate_node(*this);
12006}
12007
12008PUGI__FN xpath_node_set xml_node::select_nodes(
12009 const char_t* query, xpath_variable_set* variables) const {
12010 xpath_query q(query, variables);
12011 return select_nodes(q);
12012}
12013
12014PUGI__FN xpath_node_set xml_node::select_nodes(const xpath_query& query) const {
12015 return query.evaluate_node_set(*this);
12016}
12017
12018PUGI__FN xpath_node xml_node::select_single_node(
12019 const char_t* query, xpath_variable_set* variables) const {
12020 xpath_query q(query, variables);
12021 return select_single_node(q);
12022}
12023
12024PUGI__FN xpath_node
12025xml_node::select_single_node(const xpath_query& query) const {
12026 return query.evaluate_node(*this);
12027}
12028} // namespace pugi
12029
12030#endif
12031
12032#ifdef __BORLANDC__
12033#pragma option pop
12034#endif
12035
12036// Intel C++ does not properly keep warning state for function templates,
12037// so popping warning state at the end of translation unit leads to warnings in
12038// the middle.
12039#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
12040#pragma warning(pop)
12041#endif
12042
12043// Undefine all local macros (makes sure we're not leaking macros in header-only
12044// mode)
12045#undef PUGI__NO_INLINE
12046#undef PUGI__UNLIKELY
12047#undef PUGI__STATIC_ASSERT
12048#undef PUGI__DMC_VOLATILE
12049#undef PUGI__MSVC_CRT_VERSION
12050#undef PUGI__NS_BEGIN
12051#undef PUGI__NS_END
12052#undef PUGI__FN
12053#undef PUGI__FN_NO_INLINE
12054#undef PUGI__GETHEADER_IMPL
12055#undef PUGI__GETPAGE_IMPL
12056#undef PUGI__GETPAGE
12057#undef PUGI__NODETYPE
12058#undef PUGI__IS_CHARTYPE_IMPL
12059#undef PUGI__IS_CHARTYPE
12060#undef PUGI__IS_CHARTYPEX
12061#undef PUGI__ENDSWITH
12062#undef PUGI__SKIPWS
12063#undef PUGI__OPTSET
12064#undef PUGI__PUSHNODE
12065#undef PUGI__POPNODE
12066#undef PUGI__SCANFOR
12067#undef PUGI__SCANWHILE
12068#undef PUGI__SCANWHILE_UNROLL
12069#undef PUGI__ENDSEG
12070#undef PUGI__THROW_ERROR
12071#undef PUGI__CHECK_ERROR
12072
12073#endif
12074
Definition cm93.h:177
Runtime representation of a plugin block.