-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathwebcrawler.cpp
118 lines (105 loc) · 3.25 KB
/
webcrawler.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
#include <iostream>
#include <string.h>
#include <string>
#include <fstream>
#include <sstream>
#include <deque>
#include <set>
#include "HTTPSSession.h"
#include "HTMLElement.h"
#define HOST "project2.5700.network"
#define PORT "443"
/**
* Function for determining if HTML element is a link element
*/
bool isLink(HTMLElement element)
{
return element.getType() == "a";
}
/**
* Function for determining if HTML element is a csrf element
*/
bool isCSRF(HTMLElement element)
{
return element.getType() == "input" && element.getAttributes()["name"] == "csrfmiddlewaretoken";
}
/**
* Function for determining if HTML element is a secret flag element
*/
bool isSecret(HTMLElement element)
{
return element.getContent().find("FLAG: ") != std::string::npos;
}
/**
* Main script for web crawler
*/
int main(int argc, char const *argv[])
{
// Check args
if (argc != 3)
{
std::cout << "Usage: ./webcrawler [username] [password]" << std::endl;
exit(1);
}
// Create and connect HTTPS session
HTTPSSession session(HOST, PORT);
// Login to Fakebook
std::ostringstream loginPost;
HTTPResponseMessage res = session.get("/fakebook/");
std::list<HTMLElement> html = HTMLElement::parseDocument(res.getData());
std::list<HTMLElement> out = HTMLElement::search(html, isCSRF);
std::string csrf = (*(out.begin())).getAttributes()["value"];
loginPost << "username=" << argv[1] << "&password=" << argv[2] << "&csrfmiddlewaretoken=" << csrf << "&next=%2Ffakebook%2F";
res = session.post("/accounts/login/?next=/fakebook/", loginPost.str(), "application/x-www-form-urlencoded");
// Crawl Fakebook for secret tags
int count = 0;
std::deque<std::string> toCheck;
std::set<std::string> checked, found;
toCheck.push_back("/fakebook/");
checked.insert("/");
found.insert("/");
checked.insert("/accounts/logout/");
found.insert("/accounts/logout/");
while (count < 5 && !toCheck.empty())
{
// Get first path in queue
std::string path = toCheck.front();
toCheck.pop_front();
// Make GET request
res = session.get(path);
// Skip if 400
if (res.getStatus() == 403 || res.getStatus() == 404)
{
continue;
}
// Try path again if 500
if (res.getStatus() == 500)
{
toCheck.push_front(path);
continue;
}
// Path checked
checked.insert(path);
// Parse HTML Document
html = HTMLElement::parseDocument(res.getData());
// Check for links in HTML
std::list<HTMLElement> links = HTMLElement::search(html, isLink);
for (auto &&element : links)
{
std::string newPath = element.getAttributes()["href"];
if (newPath[0] == '/' && checked.count(newPath) == 0 && found.count(newPath) == 0)
{
toCheck.push_back(newPath);
found.insert(newPath);
}
}
// Check for secret flags in HTML
std::list<HTMLElement> secrets = HTMLElement::search(html, isSecret);
for (auto &&element : secrets)
{
std::cout << element.getContent().substr(6) << std::endl;
count++;
}
};
return 0;
}