-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathapp.js
173 lines (141 loc) · 5.55 KB
/
app.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
var http = require("http");
var cheerio = require('cheerio');
var mongo = require('mongodb');
var monk = require('monk');
var db = monk('localhost:27017/pol');
var async = require('async');
// Vinmonopolets product types
var productTypes = ["R%C3%B8dvin","Hvitvin","Ros%C3%A9vin","Musserende+vin","Fruktvin","Sterkvin","Brennevin","%C3%98l","Alkoholfritt"];
var fetchtime = new Date();
console.log("Fetchtime: " + fetchtime);
// Loop each product type
async.forEachLimit(productTypes, 1, function(productItem, callback) {
// Create an array from 1 to 200. Used to navigate to each product list page
var pages = [];
for (var i = 1; i <= 200; i++) {
pages.push(i);
}
// Loop each page for every product type. There are 187 pages for Red Wine by 15.09.2013
async.forEachLimit(pages, 5, function(page, callbackPages) {
// The search query
var url = "http://www.vinmonopolet.no/vareutvalg/sok?query=*&sort=2&sortMode=0&filterIds=25&filterValues=" + productItem + "&page=" + page;
console.log(url);
// Download the page of produtcs
download(url, function(data) {
if (data) {
// The list of products for a specific product type and page as a DOM
var list = cheerio.load(data);
// Loop all products in list
list('.product').each(function() {
// href to product details
var urlDetails = list(this).attr("href");
// Download the detail page
download(urlDetails, function(data) {
if (data) {
// The details for each product DOM
var details = cheerio.load(data);
// Product name
details('.head').each(function(){
productName = details(this).find('h1').text();
console.log(productName);
});
// Price and volume
details('.price').each(function(){
price = details(this).find('strong').text();
//console.log(price);
volume = details(this).find('em').text();
//console.log(volume);
});
// Product detail list
details('.productData').each(function(){
// Product id
var productIdObject = details(this).find("li").first().find('.data');
productId = productIdObject.text();
//console.log(productId);
// Type
var productTypeObject = productIdObject.parent().next("li").find('.data');
productType = productTypeObject.text();
//console.log(productType);
// Product choice
var productChoiceObject = productTypeObject.parent().next("li").find('.data');
productChoice = productChoiceObject.text();
//console.log(productChoice);
// Contry
productIdObject.parent().parent().find('.attrib').each(function(){
//console.log(details(this).text() + '\n');
if ( details(this).text() == "Land/distrikt:" )
{
var contryObject = details(this).next();
contry = contryObject.text();
//console.log(contry);
}
});
});
// The price and fetch time JSON object
var priceJson = {
price : price.replace(/(\r\n|\n|\r)/gm,"").replace('Kr.',"").trim(),
time : fetchtime
};
// Gets the MongoDB collection pol
var polData = db.get('pol');
// Tries to update with new prices, if product do not exists, it will be inserted
polData.update( {
_id : productId.trim()
},
{
$set: { name: productName.trim() },
$set: { type: productType.trim() },
$set: { choice: productChoice.trim() },
$set: { volume: volume.replace('(',"").replace(')',"").trim() },
$set: { contry: contry.replace(/(\r\n|\n|\r)/gm,"").trim() },
$set: { url: urlDetails.substr(0,urlDetails.indexOf(";"))},
$push: { prices: priceJson }
},
{ upsert: true }
,function (err, doc) {
if (err) {
console.log("There was a problem adding the information to the database: " + err);
}
else {
callbackPages();
}
});
}
else console.log("error");
});
});
}
else console.log("error");
});
}, function(err) {
console.log('> done product');
});
callback();
}, function(err) {
console.log('> done type');
});
process.on('exit', function() {
db.close();
console.log('Exiting');
process.exit(0);
});
process.on('SIGINT', function() {
db.close();
console.log('Got a SIGINT. Exiting');
process.exit(0);
});
// Utility function that downloads a URL and invokes
// callback with the data.
function download(url, callback) {
http.get(url, function(res) {
var data = "";
res.on('data', function (chunk) {
data += chunk;
});
res.on("end", function() {
callback(data);
});
}).on("error", function() {
callback(null);
});
}