|
40 | 40 | import org.apache.accumulo.core.clientImpl.bulk.Bulk.Files;
|
41 | 41 | import org.apache.accumulo.core.clientImpl.bulk.BulkSerialize;
|
42 | 42 | import org.apache.accumulo.core.clientImpl.bulk.LoadMappingIterator;
|
| 43 | +import org.apache.accumulo.core.conf.AccumuloConfiguration; |
43 | 44 | import org.apache.accumulo.core.conf.Property;
|
44 | 45 | import org.apache.accumulo.core.data.Mutation;
|
45 | 46 | import org.apache.accumulo.core.dataImpl.KeyExtent;
|
|
70 | 71 | import org.apache.accumulo.server.fs.VolumeManager;
|
71 | 72 | import org.apache.hadoop.fs.Path;
|
72 | 73 | import org.apache.hadoop.io.Text;
|
| 74 | +import org.apache.thrift.TApplicationException; |
73 | 75 | import org.apache.thrift.TException;
|
74 | 76 | import org.slf4j.Logger;
|
75 | 77 | import org.slf4j.LoggerFactory;
|
@@ -115,7 +117,7 @@ public long isReady(long tid, Manager manager) throws Exception {
|
115 | 117 |
|
116 | 118 | Loader loader;
|
117 | 119 | if (bulkInfo.tableState == TableState.ONLINE) {
|
118 |
| - loader = new OnlineLoader(); |
| 120 | + loader = new OnlineLoader(manager.getConfiguration()); |
119 | 121 | } else {
|
120 | 122 | loader = new OfflineLoader();
|
121 | 123 | }
|
@@ -158,112 +160,149 @@ void start(Path bulkDir, Manager manager, long tid, boolean setTime) throws Exce
|
158 | 160 |
|
159 | 161 | private static class OnlineLoader extends Loader {
|
160 | 162 |
|
| 163 | + private final int maxConnections; |
161 | 164 | long timeInMillis;
|
162 | 165 | String fmtTid;
|
163 | 166 | int locationLess = 0;
|
164 | 167 |
|
165 |
| - // track how many tablets were sent load messages per tablet server |
166 |
| - MapCounter<HostAndPort> loadMsgs; |
| 168 | + int tabletsAdded; |
167 | 169 |
|
168 | 170 | // Each RPC to a tablet server needs to check in zookeeper to see if the transaction is still
|
169 | 171 | // active. The purpose of this map is to group load request by tablet servers inorder to do less
|
170 | 172 | // RPCs. Less RPCs will result in less calls to Zookeeper.
|
171 | 173 | Map<HostAndPort,Map<TKeyExtent,Map<String,MapFileInfo>>> loadQueue;
|
172 | 174 | private int queuedDataSize = 0;
|
173 | 175 |
|
| 176 | + public OnlineLoader(AccumuloConfiguration configuration) { |
| 177 | + super(); |
| 178 | + this.maxConnections = configuration.getCount(Property.MANAGER_BULK_MAX_CONNECTIONS); |
| 179 | + } |
| 180 | + |
174 | 181 | @Override
|
175 | 182 | void start(Path bulkDir, Manager manager, long tid, boolean setTime) throws Exception {
|
176 | 183 | super.start(bulkDir, manager, tid, setTime);
|
177 | 184 |
|
178 | 185 | timeInMillis = manager.getConfiguration().getTimeInMillis(Property.MANAGER_BULK_TIMEOUT);
|
179 | 186 | fmtTid = FateTxId.formatTid(tid);
|
180 | 187 |
|
181 |
| - loadMsgs = new MapCounter<>(); |
| 188 | + tabletsAdded = 0; |
182 | 189 |
|
183 | 190 | loadQueue = new HashMap<>();
|
184 | 191 | }
|
185 | 192 |
|
186 |
| - private static final int MAX_CONNECTIONS_PER_TSERVER = 8; |
| 193 | + private static class Client { |
| 194 | + final HostAndPort server; |
| 195 | + final TabletClientService.Client service; |
| 196 | + |
| 197 | + private Client(HostAndPort server, TabletClientService.Client service) { |
| 198 | + this.server = server; |
| 199 | + this.service = service; |
| 200 | + } |
| 201 | + } |
187 | 202 |
|
188 | 203 | private void sendQueued(int threshhold) {
|
189 | 204 | if (queuedDataSize > threshhold || threshhold == 0) {
|
190 | 205 | var sendTimer = Timer.startNew();
|
191 | 206 |
|
192 |
| - List<TabletClientService.Client> clients = new ArrayList<>(); |
| 207 | + List<Client> clients = new ArrayList<>(); |
| 208 | + try { |
193 | 209 |
|
194 |
| - // Send load messages to tablet servers spinning up work, but do not wait on results. |
195 |
| - loadQueue.forEach((server, tabletFiles) -> { |
| 210 | + // Send load messages to tablet servers spinning up work, but do not wait on results. |
| 211 | + loadQueue.forEach((server, tabletFiles) -> { |
196 | 212 |
|
197 |
| - if (log.isTraceEnabled()) { |
198 |
| - log.trace("{} asking {} to bulk import {} files for {} tablets", fmtTid, server, |
199 |
| - tabletFiles.values().stream().mapToInt(Map::size).sum(), tabletFiles.size()); |
200 |
| - } |
| 213 | + if (log.isTraceEnabled()) { |
| 214 | + log.trace("{} asking {} to bulk import {} files for {} tablets", fmtTid, server, |
| 215 | + tabletFiles.values().stream().mapToInt(Map::size).sum(), tabletFiles.size()); |
| 216 | + } |
201 | 217 |
|
202 |
| - // On the server side tablets are processed serially with a write to the metadata table |
203 |
| - // done for each tablet. Chunk the work up for a tablet server up so that it can be sent |
204 |
| - // over |
205 |
| - // multiple connections allowing it to run in parallel on the server side. This allows |
206 |
| - // multiple threads on a single tserver to do metadata writes for this bulk import. |
207 |
| - int neededConnections = Math.min(MAX_CONNECTIONS_PER_TSERVER, tabletFiles.size()); |
208 |
| - List<Map<TKeyExtent,Map<String,MapFileInfo>>> chunks = new ArrayList<>(neededConnections); |
209 |
| - for (int i = 0; i < neededConnections; i++) { |
210 |
| - chunks.add(new HashMap<>()); |
211 |
| - } |
| 218 | + // On the server side tablets are processed serially with a write to the metadata table |
| 219 | + // done for each tablet. Chunk the work up for a tablet server up so that it can be sent |
| 220 | + // over multiple connections allowing it to run in parallel on the server side. This |
| 221 | + // allows multiple threads on a single tserver to do metadata writes for this bulk |
| 222 | + // import. |
| 223 | + int neededConnections = Math.min(maxConnections, tabletFiles.size()); |
| 224 | + List<Map<TKeyExtent,Map<String,MapFileInfo>>> chunks = |
| 225 | + new ArrayList<>(neededConnections); |
| 226 | + for (int i = 0; i < neededConnections; i++) { |
| 227 | + chunks.add(new HashMap<>()); |
| 228 | + } |
212 | 229 |
|
213 |
| - int nextConnection = 0; |
214 |
| - for (var entry : tabletFiles.entrySet()) { |
215 |
| - chunks.get(nextConnection++ % chunks.size()).put(entry.getKey(), entry.getValue()); |
216 |
| - } |
| 230 | + int nextConnection = 0; |
| 231 | + for (var entry : tabletFiles.entrySet()) { |
| 232 | + chunks.get(nextConnection++ % chunks.size()).put(entry.getKey(), entry.getValue()); |
| 233 | + } |
217 | 234 |
|
218 |
| - for (var chunk : chunks) { |
| 235 | + for (var chunk : chunks) { |
| 236 | + try { |
| 237 | + var client = ThriftUtil.getClient(ThriftClientTypes.TABLET_SERVER, server, |
| 238 | + manager.getContext(), timeInMillis); |
| 239 | + // add client to list before calling send in case there is an exception, this makes |
| 240 | + // sure its returned in the finally |
| 241 | + clients.add(new Client(server, client)); |
| 242 | + client.send_loadFilesV2(TraceUtil.traceInfo(), manager.getContext().rpcCreds(), tid, |
| 243 | + bulkDir.toString(), chunk, setTime); |
| 244 | + } catch (TException ex) { |
| 245 | + log.debug("rpc failed server: {}, {}", server, fmtTid, ex); |
| 246 | + } |
| 247 | + } |
| 248 | + }); |
| 249 | + |
| 250 | + long sendTime = sendTimer.elapsed(TimeUnit.MILLISECONDS); |
| 251 | + sendTimer.restart(); |
| 252 | + |
| 253 | + int outdatedTservers = 0; |
| 254 | + |
| 255 | + // wait for all the tservers to complete processing |
| 256 | + for (var client : clients) { |
219 | 257 | try {
|
220 |
| - var client = ThriftUtil.getClient(ThriftClientTypes.TABLET_SERVER, server, |
221 |
| - manager.getContext(), timeInMillis); |
222 |
| - client.send_loadFiles(TraceUtil.traceInfo(), manager.getContext().rpcCreds(), tid, |
223 |
| - bulkDir.toString(), chunk, setTime); |
224 |
| - clients.add(client); |
| 258 | + client.service.recv_loadFilesV2(); |
225 | 259 | } catch (TException ex) {
|
226 |
| - log.debug("rpc failed server: " + server + ", " + fmtTid + " " + ex.getMessage(), ex); |
227 |
| - // TODO return client |
| 260 | + String additionalInfo = ""; |
| 261 | + if (ex instanceof TApplicationException) { |
| 262 | + if (((TApplicationException) ex).getType() |
| 263 | + == TApplicationException.UNKNOWN_METHOD) { |
| 264 | + // A new RPC method was added in 2.1.4, a tserver running 2.1.3 or earlier will |
| 265 | + // not have this RPC. This should not kill the fate operation, it can wait until |
| 266 | + // all tablet servers are upgraded. |
| 267 | + outdatedTservers++; |
| 268 | + additionalInfo = " (tserver may be running older version)"; |
| 269 | + } |
| 270 | + } |
| 271 | + log.debug("rpc failed server{}: {}, {}", additionalInfo, client.server, fmtTid, ex); |
228 | 272 | }
|
229 | 273 | }
|
230 |
| - }); |
231 |
| - |
232 |
| - long sendTime = sendTimer.elapsed(TimeUnit.MILLISECONDS); |
233 |
| - sendTimer.restart(); |
234 |
| - |
235 |
| - // wait for all the tservers to complete processing |
236 |
| - for (var client : clients) { |
237 |
| - try { |
238 |
| - client.recv_loadFiles(); |
239 |
| - } catch (TException ex) { |
240 |
| - // TODO need to keep the server |
241 |
| - String server = "???"; |
242 |
| - log.debug("rpc failed server: " + server + ", " + fmtTid + " " + ex.getMessage(), ex); |
243 |
| - } finally { |
244 |
| - // TODO this finally needs to be structured better, needs to cover entire send and |
245 |
| - // receive code. Clients can fall throught the cracks w/ this. |
246 |
| - ThriftUtil.returnClient(client, manager.getContext()); |
247 |
| - } |
248 |
| - } |
249 | 274 |
|
250 |
| - if (log.isDebugEnabled()) { |
251 |
| - var recvTime = sendTimer.elapsed(TimeUnit.MILLISECONDS); |
| 275 | + if (outdatedTservers > 0) { |
| 276 | + log.info( |
| 277 | + "{} can not proceed with bulk import because {} tablet servers are likely running " |
| 278 | + + "an older version. Please update tablet servers to same patch level as manager.", |
| 279 | + fmtTid, outdatedTservers); |
| 280 | + } |
252 | 281 |
|
253 |
| - log.debug("{} sent {} messages to {} tablet servers, send time:{}ms recv time:{}ms", |
254 |
| - fmtTid, clients.size(), loadQueue.size(), sendTime, recvTime); |
255 |
| - } |
| 282 | + if (log.isDebugEnabled()) { |
| 283 | + var recvTime = sendTimer.elapsed(TimeUnit.MILLISECONDS); |
| 284 | + int numTablets = loadQueue.values().stream().mapToInt(Map::size).sum(); |
| 285 | + log.debug( |
| 286 | + "{} sent {} messages to {} tablet servers for {} tablets, send time:{}ms recv time:{}ms {}:{}", |
| 287 | + fmtTid, clients.size(), loadQueue.size(), numTablets, sendTime, recvTime, |
| 288 | + Property.MANAGER_BULK_MAX_CONNECTIONS.getKey(), maxConnections); |
| 289 | + } |
256 | 290 |
|
257 |
| - loadQueue.clear(); |
258 |
| - queuedDataSize = 0; |
| 291 | + loadQueue.clear(); |
| 292 | + queuedDataSize = 0; |
259 | 293 |
|
| 294 | + } finally { |
| 295 | + for (var client : clients) { |
| 296 | + ThriftUtil.returnClient(client.service, manager.getContext()); |
| 297 | + } |
| 298 | + } |
260 | 299 | }
|
261 | 300 | }
|
262 | 301 |
|
263 | 302 | private void addToQueue(HostAndPort server, KeyExtent extent,
|
264 | 303 | Map<String,MapFileInfo> thriftImports) {
|
265 | 304 | if (!thriftImports.isEmpty()) {
|
266 |
| - loadMsgs.increment(server, 1); |
| 305 | + tabletsAdded++; |
267 | 306 |
|
268 | 307 | Map<String,MapFileInfo> prev = loadQueue.computeIfAbsent(server, k -> new HashMap<>())
|
269 | 308 | .putIfAbsent(extent.toThrift(), thriftImports);
|
@@ -317,16 +356,12 @@ long finish() {
|
317 | 356 | sendQueued(0);
|
318 | 357 |
|
319 | 358 | long sleepTime = 0;
|
320 |
| - if (loadMsgs.size() > 0) { |
321 |
| - // Find which tablet server had the most load messages sent to it and sleep 13ms for each |
322 |
| - // load message. Assuming it takes 13ms to process a single message. The tablet server will |
323 |
| - // process these message in parallel, so assume it can process 16 in parallel. Must return a |
324 |
| - // non-zero value when messages were sent or the calling code will think everything is done. |
325 |
| - sleepTime = Math.max(1, (loadMsgs.max() * 13) / 16); |
326 |
| - |
327 |
| - // TODO since a result from the tablet is being waited on now, could have the tserver report |
328 |
| - // back success or not. If everything was a success, then could be done and avoid a |
329 |
| - // subsequent metadata scan. |
| 359 | + if (tabletsAdded > 0) { |
| 360 | + // Waited for all the tablet servers to process everything so a long sleep is not needed. |
| 361 | + // Even though this code waited, it does not know what succeeded on the tablet server side |
| 362 | + // and it did not track if there were connection errors. Since success status is unknown |
| 363 | + // must return a non-zero sleep to indicate another scan of the metadata table is needed. |
| 364 | + sleepTime = 1; |
330 | 365 | }
|
331 | 366 |
|
332 | 367 | if (locationLess > 0) {
|
|
0 commit comments