-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathHTMLElement.cpp
155 lines (144 loc) · 4.12 KB
/
HTMLElement.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
#include "HTMLElement.h"
#include <string>
#include <sstream>
#include <list>
#include <map>
#include <functional>
/**
* Create HTML Element. Creates children from content string
*/
HTMLElement::HTMLElement(std::string type, std::map<std::string, std::string> attributes, std::string content)
{
this->type = type;
this->attributes = attributes;
if (content.find("<") == std::string::npos)
{
this->content = content;
}
else
{
this->content = "";
this->children = HTMLElement::parseElements(content);
}
}
/**
* Create HTML Element list from HTML document
*/
std::list<HTMLElement> HTMLElement::parseDocument(std::string doc)
{
int HTMLOpen = doc.find("<!");
int HTMLClose = doc.find(">", HTMLOpen);
doc.erase(0, HTMLClose + 1);
return HTMLElement::parseElements(doc);
}
/**
* Create HTML Element list from string
*/
std::list<HTMLElement> HTMLElement::parseElements(std::string doc)
{
std::map<std::string, std::string> attributes;
std::string type = "";
std::string content = "";
std::list<HTMLElement> elements;
int HTMLOpen, HTMLClose = 0;
while ((HTMLOpen = doc.find("<", HTMLClose)) != std::string::npos)
{
HTMLClose = doc.find(">", HTMLOpen);
if (doc[HTMLOpen + 1] == '!')
{
continue;
}
int typeSpace = doc.find(" ", HTMLOpen);
if (typeSpace == std::string::npos || typeSpace > HTMLClose)
{
typeSpace = HTMLClose;
}
else
{
std::string attributesString = doc.substr(typeSpace + 1, HTMLClose - (typeSpace + 1));
attributes = HTMLElement::parseAttributes(attributesString);
}
type = doc.substr(HTMLOpen + 1, typeSpace - (HTMLOpen + 1));
if (type != "meta" && type != "link" && type != "hr")
{
// figure out how to parse correct open and closing tags
std::ostringstream startBuffer;
std::ostringstream endBuffer;
endBuffer << "</" << type << ">";
startBuffer << "<" << type;
int childStart = doc.find(startBuffer.str(), HTMLOpen + 1);
HTMLOpen = doc.find(endBuffer.str(), HTMLClose);
int childEnd = HTMLClose;
while (childStart != std::string::npos && childStart < HTMLOpen)
{
childStart = doc.find(startBuffer.str(), childStart + 1);
HTMLOpen = doc.find(endBuffer.str(), HTMLOpen + 1);
}
content = doc.substr(HTMLClose + 1, HTMLOpen - (HTMLClose + 1));
HTMLClose = doc.find(">", HTMLOpen);
}
HTMLElement element(type, attributes, content);
elements.push_back(element);
}
return elements;
}
/**
* Get element attributes
*/
std::map<std::string, std::string> HTMLElement::parseAttributes(std::string doc)
{
std::map<std::string, std::string> attributes;
std::string key, value;
int stringPointer;
while (doc.length() > 0)
{
stringPointer = doc.find("=");
key = doc.substr(0, stringPointer);
doc.erase(0, stringPointer + 2);
stringPointer = doc.find("\"");
value = doc.substr(0, stringPointer);
doc.erase(0, stringPointer + 2);
attributes[key] = value;
}
return attributes;
}
/**
* Search for elements and children of elements in list where function evaluates as true
*/
std::list<HTMLElement> HTMLElement::search(std::list<HTMLElement> list, const std::function<bool(HTMLElement)> &f)
{
std::list<HTMLElement> matches;
for (auto &&element : list)
{
if (f(element))
{
matches.push_back(element);
}
for (auto &&element : HTMLElement::search(element.children, f))
{
matches.push_back(element);
}
}
return matches;
}
/**
* Get HTML type of element
*/
std::string HTMLElement::getType()
{
return this->type;
}
/**
* Get element attributes
*/
std::map<std::string, std::string> HTMLElement::getAttributes()
{
return this->attributes;
}
/**
* Get element attributes
*/
std::string HTMLElement::getContent()
{
return this->content;
}