-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsenderos.py
98 lines (82 loc) · 3.55 KB
/
senderos.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# -*- encoding: utf-8 -*-
# encoding: latin1
import re
import sys
import urllib.request
print (sys.stdout.encoding)
def cargar_links():
expr = "map-data"
wp = urllib.request.urlopen ("http://www.webtenerife.com/que-hacer/naturaleza/senderismo/senderos/?gclid=CMfX7qWrotICFYc_GwodZi4LyQ&page-index=1&tab-view-mode=listado" )
pw = wp.read ( ).decode ( wp.headers.get_content_charset ( ) )
res = pw.split ( )
indx = [ ]
res2 = [ ]
for i in range ( 0 , len ( res ) ):
if re.search ( expr , str ( res[ i ] ) ):
indx += [ i ]
res2 = [ ]
for i in range ( 0 , len ( indx ) ):
aux = str ( res[ indx[ i ] ] ).split ( "/" )
tam = len ( aux )
res[ indx[ i ] ] = aux[ tam - 1 ]
aux = str ( res[ indx[ i ] ] ).split ( "\"" )
res[ indx[ i ] ] = aux[ 0 ]
res[ indx[ i ] ] = res[ indx[ i ] ].replace ( "aspx" , "htm" )
res2 += [ "http://www.webtenerife.com/que-hacer/naturaleza/senderismo/senderos/" + res[ indx[ i ] ] ]
return res2
print("Done")
links1 = cargar_links ( )
resultado = [ ]
for i in range ( 0 , len ( links1 ) ):
vec_ub = urllib.request.urlopen ( links1[ i ] )
vec_ub = vec_ub.read ( ).decode ( vec_ub.headers.get_content_charset ( ) )
vec_ub = vec_ub.split ( "<" )
vec_lin = urllib.request.urlopen ( links1[ i ] )
vec_lin = vec_lin.read ( ).decode ( vec_lin.headers.get_content_charset ( ) )
vec_lin = vec_lin.split ( "<strong>" )
for i in range ( 0 , len ( vec_lin ) ):
if re.search ( "Inicio:" , str ( vec_lin[ i ] ) ):
aux = vec_lin[ i ].split ( "</strong>" )
aux = aux[ 1 ].split ( "</li>" )
resultado += [ aux[ 0 ].lstrip ( ) ]
for i in range ( 0 , len ( vec_lin ) ):
if re.search ( "Fin:" , str ( vec_lin[ i ] ) ):
aux = vec_lin[ i ].split ( "</strong>" )
aux = aux[ 1 ].split ( "</li>" )
resultado += [ aux[ 0 ].lstrip ( ) ]
for i in range ( 0 , len ( vec_lin ) ):
if re.search ( "Grado de dificultad:" , str ( vec_lin[ i ] ) ):
aux = vec_lin[ i ].split ( "</strong>" )
aux = aux[ 1 ].split ( "</li>" )
resultado += [ aux[ 0 ].lstrip ( ) ]
for i in range ( 0 , len ( vec_lin ) ):
if re.search ( "Tipo de recorrido:" , str ( vec_lin[ i ] ) ):
aux = vec_lin[ i ].split ( "</strong>" )
aux = aux[ 1 ].split ( "</li>" )
resultado += [ aux[ 0 ].lstrip ( ) ]
for i in range ( 0 , len ( vec_lin ) ):
if re.search ( "Distancia:" , str ( vec_lin[ i ] ) ):
aux = vec_lin[ i ].split ( "</strong>" )
aux = aux[ 1 ].split ( "</li>" )
resultado += [ aux[ 0 ].lstrip ( ) ]
for i in range ( 0 , len ( vec_lin ) ):
if re.search ( "Duraci" , str ( vec_lin[ i ] ) ):
aux = vec_lin[ i ].split ( "</strong>" )
aux = aux[ 1 ].split ( "</li>" )
resultado += [ aux[ 0 ].lstrip ( ) ]
for i in range ( 0 , len ( vec_ub ) ):
if re.search ( "meta itemprop=" , str ( vec_ub[ i ] ) ):
aux = vec_ub[ i ].split ( "content=\"" )
aux = aux[ 1 ].split ( "\"" )
resultado += [ aux[ 0 ].lstrip ( ) ]
# print(resultado)
with open ( 'lib/output.csv' , 'a' ) as f:
print (len ( resultado ))
f.write ( ((str ( resultado )).replace ( "[" , "" )).replace ( "]" , "" ) )
f.write ( "\n" )
f.close ( )
resultado = [ ]
# for i in range(0,len(links1)):
# print(links1[i])