-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmoreland_fetcher.rb
executable file
·84 lines (76 loc) · 2.37 KB
/
moreland_fetcher.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
#!/usr/bin/ruby -rrubygems
require 'nokogiri'
require 'fetcher'
require 'moreland'
module MorelandFetcher
extend Fetcher
URL = 'http://moreland.vic.gov.au/building-and-planning/planning/planning-permit-applications.html'
def self.pdf_to_txt_file(pdf_file)
txt_file = Pathname.new((pdf_file.to_s.split('.')[0..-2]+['txt']).join('.'))
unless txt_file.exist?
puts "Converting to text #{txt_file} ..."
`pdftotext -layout #{pdf_file.to_s.inspect} #{txt_file.to_s.inspect}` unless txt_file.exist?
end
txt_file
end
def self.fetch_files
doc = Nokogiri.parse(get_data(URL))
doc.xpath("//a[contains(@href, '.pdf')]/@href").collect { |x|
cache_file = get_cache_file(x.value)
pdf_to_txt_file(cache_file)
}
end
end
if __FILE__ == $0
files = if ARGV.size > 0
ARGV.collect { |f|
f = Pathname.new(f) unless f.is_a? Pathname
f.extname == '.pdf' ? MorelandFetcher.pdf_to_txt_file(f) : f
}
else
MorelandFetcher.fetch_files
end
# XXX only saves the ones that have a current_ward XXX
files.each { |f|
report = case f
when /planning-permit-received.txt/
Received
when /003(\s|%20)qryplanpermitappnrecdbyward.txt/
Received
when /008(\s|%20)planningpermitsinprogress.txt/
Updated
when /planning-permit-progress.txt/
Updated
when /014(\s|%20)subdivision(\s|%20)certifications(\s|%20)in(\s|%20)progress.txt/
UpdatedSubdivisions
when /planning-permit-subdivision-certification-progress.txt/
UpdatedSubdivisions
when /planning-permit-advertised.txt/
Advertised
when /002(\s|%20)rptplanpermitappnadvbyward.txt/
Advertised
when /002 permitappnadvbyward.txt/
Advertised
when /planning-permit-determination.txt/
Decided2
when /08....\/006(\s|%20)planning(\s|%20)decisions.txt/
Decided2
when /090...\/006(\s|%20)planning(\s|%20)decisions.txt/
Decided2
when /09100(2|5)\/006(\s|%20)planning(\s|%20)decisions.txt/
Decided2
when /006(\s|%20)planning(\s|%20)decisions.txt/
Decided
when /planning-permit-subdivision-certification-determination.txt/
DecidedSubdivisions
when /013(\s|%20)subdivision(\s|%20)certification(\s|%20)decisions.txt/
DecidedSubdivisions
else
warn "ERROR unhandled file: #{f}"
end
if report
puts r = report.parse(f)
#r.save
end
}
end