-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathforestutils.py
513 lines (455 loc) · 20 KB
/
forestutils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
#!/usr/bin/env python3
"""
Tools for analysing forest point clouds.
Inputs: a coloured pointcloud in ``.ply`` format (XYZRGB vertices), which
can be obtained by putting images from drone photography through
structure-from-motion software.
Specifically: little-endian binary format ply
Outputs (most are optional):
* A 'sparse' (i.e. canopy-only) point cloud, with most ground points discarded.
This eases further analysis, storage, etc without compromising coverage of vegetation.
* A ``.csv`` file listing identified trees, with traits including location,
height, canopy area, colour, and point count.
* Individual files containing the point cloud for each tree.
Extensive use of mutable coordinate-property mappings and streamed input
ensure that even files too large to load in memory can be processed. In extreme
cases, the resolution can be decreased to trade accuracy for memory.
Example outputs (from an older version):
`a map <https://www.google.com/maps/d/viewer?mid=z1pH7HaTWL9Q.kzQflQGYVRIU>`_,
and `pointclouds <http://phenocam.org.au/pointclouds>`_.
"""
# pylint:disable=unsubscriptable-object
# log file name base - the log file will be this + the current time + '.log'
LOG_NAME = 'forestutils'
import argparse
import csv
import math
import os
import datetime
import logging
from typing import MutableMapping, NamedTuple, Tuple, Set
import utm # type: ignore
from . import pointcloudfile
# User-defined types
#XY_Coord is
XY_Coord = NamedTuple('XY_Coord', [('x', int), ('y', int)])
Coord_Labels = MutableMapping[XY_Coord, int]
def coords(pos):
"""
Return a tuple of integer coordinates as keys for the MapObj dict/map.
This is necessary because the MapObj uses a dictionary to store each
attribute.
* pos can be a full point tuple, or just (x, y)
* use floor() to avoid imprecise float issues
"""
x = math.floor(pos.x / args.cellsize)
y = math.floor(pos.y / args.cellsize)
return XY_Coord(x, y)
def neighbors(key: XY_Coord) -> Tuple[XY_Coord, ...]:
"""
Take an XY coordinate key and return the adjacent keys,
whether they exist or not.
"""
return tuple(XY_Coord(key.x + a, key.y + b)
for a in (-1, 0, 1) for b in (-1, 0, 1) if a or b)
def connected_components(input_dict: Coord_Labels) -> None:
"""
Connected components in a dict of coordinates.
Uses depth-first search. Non-component cells are absent from the input.
"""
def expand(old_key: XY_Coord, com: MutableMapping) -> None:
"""
Implement depth-first search.
"""
for key in neighbors(old_key):
if com.get(key) is None:
return
if com[key] == com[old_key]:
continue
elif com[key] < com[old_key]:
com[old_key] = com[key]
else:
com[key] = com[old_key]
expand(key, com)
for key in tuple(input_dict):
try:
expand(key, input_dict)
except RuntimeError:
logging.info('Maximum recursion depth exceeded; finishing run.')
# Recursion depth; finish on next pass.
continue
def detect_issues(ground_dict: Coord_Labels, prior: set) -> Set[XY_Coord]:
"""
Identifies cells with more than 2:1 slope to 3+ adjacent cells.
Greater than 2:1 slope is suspiciously steep; 3+ usually indicates a
misclassified cell or data artefact.
"""
problematic = set()
for k in prior:
adjacent = {ground_dict.get(n) for n in neighbors(k)}
adjacent.discard(None)
if len(adjacent) < 6:
continue
# Number of cells at more than 2:1 slope - suspiciously steep.
# 3+ usually indicates a misclassified cell or data artefact.
probs = sum(abs(ground_dict[k]-n) > 2*args.cellsize for n in adjacent)
if probs >= 3:
problematic.add(k)
return problematic
def smooth_ground(ground_dict: Coord_Labels) -> None:
"""
Smoothes the ground map, to reduce the impact of spurious points, eg.
points far underground or misclassification of canopy as ground.
"""
logging.info('Smoothing the ground map.')
problematic = set(ground_dict)
for _ in range(100):
problematic = detect_issues(ground_dict, problematic)
for key in problematic:
adjacent = {ground_dict.get(n) for n in neighbors(key)
if n not in problematic}
adjacent.discard(None)
if not adjacent:
continue
ground_dict[key] = min(adjacent) + 2*args.cellsize
class MapObj:
"""
Stores a maximum and minimum height map of the cloud, in GRID_SIZE
cells. Hides data structure and accessed through coordinates.
Data structure is a set of dictionaries, one for each attribute. Each dict
is a contains, for a single attribute, all the values for all the points.
Note that the dict is faster to access than an array.
"""
# pylint:disable=too-many-instance-attributes
def __init__(self, input_file, *, colours=True):
"""
Args:
input_file (path): the ``.ply`` file to process. If dealing with
Pix4D outputs, ``*_part_1.ply``.
colours (bool): whether to read colours from the file. Set to
False for eg. LIDAR data where mean colour is not useful.
prev_csv (path): path to a csv file which associates a name
with coordinates, to correctly name detected trees.
zone (int): the UTM zone of the site.
south (bool): if the site is in the southern hemisphere.
"""
logging.debug('Create a MapObj')
self.file = input_file
self.canopy = dict()
self.density = dict()
self.filtered_density = dict()
self.ground = dict()
self.colours = dict()
self.trees = dict()
self.header = pointcloudfile.parse_ply_header(
pointcloudfile.ply_header_text(input_file))
logging.info('Moving x,y by utm offset by calling pointcloudfile.offset_for({})'.format(input_file))
x, y, _ = pointcloudfile.offset_for(input_file)
self.utm = pointcloudfile.UTM_Coord(x, y, args.utmzone, args.north)
self.update_spatial()
if colours:
self.update_colours()
def update_spatial(self):
"""
Expand, correct, or maintain map with a new observed point.
Initialize density and filtered_density to 1.
Increment density but do not increment filtered_density - that is done
in function update_colors
"""
# Fill out the spatial info in the file
for p in pointcloudfile.read(self.file):
idx = coords(p)
if self.density.get(idx) is None:
self.density[idx] = 1
self.canopy[idx] = p.z
self.ground[idx] = p.z
self.filtered_density[idx] = 1
continue
self.density[idx] += 1
if self.ground[idx] > p.z:
self.ground[idx] = p.z
elif self.canopy[idx] < p.z:
self.canopy[idx] = p.z
smooth_ground(self.ground)
self.trees = self._tree_components()
def update_colours(self):
"""
Expand, correct, or maintain map with a new observed point.
"""
# We assume that vertex attributes not named "x", "y" or "z"
# are colours, and thus accumulate a total to get the mean
for p in pointcloudfile.read(self.file):
if self.is_ground(p):
continue
p_cols = {k: v for k, v in p._asdict().items() if k not in 'xyz'}
idx = coords(p)
# filtered_density is the total number of points in the tree after
# the ground has been removed
self.filtered_density[idx] += 1
if idx not in self.colours:
self.colours[idx] = p_cols
else:
for k, v in p_cols.items():
self.colours[idx][k] += v
def is_ground(self, point) -> bool:
"""
Returns boolean whether the point is not classified as ground - i.e.
True if within GROUND_DEPTH of the lowest point in the cell.
If not lossy, also true for lowest ground point in a cell.
"""
return point[2] - self.ground[coords(point)] < args.grounddepth
def is_lowest(self, point) -> bool:
"""Returns boolean whether the point is lowest in that grid cell.
"""
return point[2] == self.ground[coords(point)]
def __len__(self) -> int:
"""Total observed points.
"""
return sum(self.density.values())
def _tree_components(self) -> Coord_Labels:
"""Returns a dict where keys refer to connected components.
NB: Not all keys in other dicts exist in this output.
"""
# Set up a boolean array of larger keys to search
key_scale_record = {} # type: Dict[XY_Coord, Set[XY_Coord]]
for key in self.density:
if self.canopy[key] - self.ground[key] > args.slicedepth:
cc_key = XY_Coord(int(math.floor(key.x / args.joinedcells)),
int(math.floor(key.y / args.joinedcells)))
if cc_key not in key_scale_record:
key_scale_record[cc_key] = {key}
else:
key_scale_record[cc_key].add(key)
# Assign a unique integer value to each large key, then search
# Final labels are positive ints, but not ordered or consecutive
trees = {k: i for i, k in enumerate(tuple(key_scale_record))}
connected_components(trees)
# Copy labels to grid of original scale
return {s: trees[k] for k, v in key_scale_record.items() for s in v}
def tree_data(self, keys: Set[XY_Coord]) -> dict:
"""
Return a dictionary of data about the tree in the given keys.
"""
# Calculate positional information
x = self.utm.x + args.cellsize * sum(k.x for k in keys) / len(keys)
y = self.utm.y + args.cellsize * sum(k.y for k in keys) / len(keys)
lat, lon = utm.to_latlon(x, y, self.utm.zone, northern=self.utm.north)
out = {
'latitude': lat,
'longitude': lon,
'UTM_X': x,
'UTM_Y': y,
'UTM_zone': args.utmzone,
'height': 0,
'area': len(keys) * args.cellsize**2,
'base_altitude': sum(self.ground[k] for k in keys) / len(keys),
'point_count': 0,
}
for k in keys:
out['height'] = max(out['height'], self.canopy[k] - self.ground[k])
out['point_count'] += self.density[k]
for colour, total in self.colours[k].items():
out[colour] = total / self.filtered_density[k]
return out
def all_trees(self):
"""
Yield the characteristics of each tree.
Use to iterate over the trees.
"""
ids = list(set(self.trees.values()))
keys = {v: set() for v in ids}
for k, v in self.trees.items():
if v is None:
continue
keys[v].add(k)
for v in ids:
data = self.tree_data(keys[v])
if data['height'] > 1.5 * args.slicedepth:
# Filter trees by height
yield data
def save_sparse_cloud(self, new_fname, lowest=True, canopy=True):
"""
Yield points for a canopy-only point cloud, eliminating ~3/4 of all
points without affecting analysis.
"""
newpoints = (point for point in pointcloudfile.read(self.file)
if canopy and not self.is_ground(point) or
lowest and self.is_lowest(point))
pointcloudfile.write(newpoints, new_fname, self.header, self.utm)
if lowest and canopy:
self.file = new_fname
def save_individual_trees(self):
"""
Save single trees to pointcloud files, if the 'savetrees' flag is set.
Use the directory specified by the savetrees flag.
"""
if not args.savetrees:
return
if os.path.isfile(args.savetrees):
error = 'Output dir for trees is a file; a directory is required.'
logging.error(error)
raise IOError(error)
if not os.path.isdir(args.savetrees):
os.makedirs(args.savetrees)
# Map tree ID numbers to an incremental writer for that tree
tree_to_file = {tree_ID: pointcloudfile.IncrementalWriter(
os.path.join(args.savetrees, 'tree_{}.ply'.format(tree_ID)),
self.header, self.utm) for tree_ID in set(self.trees.values())}
# For non-ground, find the appropriate writer and call with the point
for point in pointcloudfile.read(self.file):
val = self.trees.get(coords(point))
if val is not None:
tree_to_file[val](point)
def stream_analysis(self, csv_filename: str) -> None:
"""
Save the list of trees with attributes to the file in file 'csv_filename'.
"""
logging.info('Write the tree data to the csv file "{}"'.format(csv_filename))
header = ('latitude', 'longitude', 'UTM_X', 'UTM_Y', 'UTM_zone',
'height', 'area', 'base_altitude', 'point_count') + tuple(
a for a in self.header.names if a not in 'xyz')
with open(csv_filename, 'w', newline='') as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=header)
writer.writeheader()
for data in self.all_trees():
writer.writerow(data)
def get_args():
"""
Handle command-line arguments, including default values.
"""
parser = argparse.ArgumentParser(
description=('Takes a .ply forest point cloud; outputs a sparse'
'point cloud and a .csv file of attributes'
'for each tree.'))
parser.add_argument(
'file', help='name of the file to process', type=str)
parser.add_argument(
'out', default='.', nargs='?', type=str,
help='directory for output files (optional)')
parser.add_argument(
'--savetrees', default='', nargs='?', type=str,
help='where to save individual trees (default "", not saved)')
parser.add_argument( # analysis scale
'--cellsize', default=0.1, nargs='?', type=float,
help='grid scale; optimal at ~10x point spacing')
parser.add_argument( # georeferenced location
'--utmzone', default=55, type=int,
help='the UTM coordinate zone for georeferencing')
parser.add_argument( # georeferenced location
'--north', action='store_true',
help='set if in the northern hemisphere')
parser.add_argument( # feature extraction
'--joinedcells', default=3, type=float,
help='use cells X times larger to detect gaps between trees')
parser.add_argument( # feature extraction
'--slicedepth', default=0.6, type=float,
help='slice depth for canopy area and feature extraction')
parser.add_argument( # feature classification
'--grounddepth', default=0.2, type=float,
help='depth to omit from sparse point cloud')
return parser.parse_args()
def main_processing():
"""
Logic on which functions to call, and efficient order.
"""
# args is a global variable
print('Reading from "{}" ...'.format(args.file))
logging.info('Reading from "{}" ...'.format(args.file))
# File I/O
# sparse_filename is a string containing the name of the main output file
# Set output file name to <input file name>_sparse.ply
sparse_filename = os.path.join(args.out, os.path.basename(args.file))
if not args.file.endswith('_sparse.ply'):
sparse_filename = os.path.join(
args.out, os.path.basename(args.file)[:-4] + '_sparse.ply')
sparse_filename = sparse_filename.replace('_part_1', '')
"""
Confirm why this is done - I am re-running this, so sparse already exists. But when
we create the object using the sparse filename, then the .xyz filename is wrong.
Confirm what point of this was??
"""
if os.path.isfile(sparse_filename):
logging.info('"sparse" file already exist, using this file')
attr_map = MapObj(sparse_filename)
print('Read {} points into {} cells'.format(
len(attr_map), len(attr_map.canopy)))
logging.info('Read {} points into {} cells'.format(
len(attr_map), len(attr_map.canopy)))
else:
attr_map = MapObj(args.file, colours=False)
print('Read {} points into {} cells, writing "{}" ...'.format(
len(attr_map), len(attr_map.canopy), sparse_filename))
logging.info('Read {} points into {} cells, writing "{}" ...'.format(
len(attr_map), len(attr_map.canopy), sparse_filename))
attr_map.save_sparse_cloud(sparse_filename)
print('Reading colours from ' + sparse_filename)
logging.info('Reading colours from {}'.format(sparse_filename))
attr_map.update_colours()
print('File IO complete, starting analysis...')
logging.info('File IO complete, starting analysis...')
# table is a string containing the name of the csv file to save tree data in
table = '{}_analysis.csv'.format(sparse_filename[:-4].replace('_sparse', ''))
# write the tree data to a csv file
logging.info('Calling stream_analysis to write the csv file')
attr_map.stream_analysis(table)
# save pointclouds for individual trees
if args.savetrees is not None:
print('Saving individual trees...')
logging.info('Saving individual trees')
attr_map.save_individual_trees()
print('Done.')
logging.info('Done.')
def logging_setup():
"""
Set up an execution log and set the format for the log records.
"""
# get the current time, and create a log file name using the time
starttime = str(datetime.datetime.now())
print(starttime)
# format the time string to something filename friendly (no ':')
# and remove milliseconds (after .) and seconds (last 3 chars)
starttime = starttime.split(sep='.')[0].replace(':','-')[:-3]
logfilename = LOG_NAME + '-' + starttime + '.log'
# create and configure log
# change the level to logging.DEBUG to see all the debugging messages
logging.basicConfig(
filename=logfilename, level=logging.DEBUG,
format='%(asctime)s %(levelname)s - %(funcName)s: %(message)s',
datefmt="%Y-%m-%d %H:%M"
)
logging.info('Started forestutils.')
logging.debug(' logging_setup: created a log file')
def main():
"""
Interface to call from outside the package.
"""
# pylint:disable=global-statement
print('Welcome to forestutils 3D tree mapping program.')
# start an execution log for the program for info and/or debugging
logging_setup()
global args
args = get_args()
# perform IO checks to ensure that:
# - the input file exists
# - if given, the output dir exists and is a directory (not a file)
# - if the savetrees flag is set, it specifies a valid directory
if not os.path.isfile(args.file):
logging.error('Input file not found.')
raise IOError('Input file not found, ' + args.file)
# Check that 'out' is a valid folder now, BEFORE doing all the processing
if not os.path.isdir(args.out):
logging.error('Output directory is not valid.')
raise IOError('Output directory is not valid, ' + args.out)
# If savetrees flag is set, check if there is a dir specified which
# already exists but is a file, and if so raise an error now
if args.savetrees is not None:
if os.path.isfile(args.savetrees):
logging.error('Output dir for trees is a file; a directory is required.')
raise IOError('Output dir for trees is a file; a directory is required.')
logging.info('Commencing main processing function.')
main_processing()
if __name__ == '__main__':
# Call to get_args is duplicated to work in static analysis, from
# command line, and when installed as package (calls main directly)
args = get_args()
main()