jobdatastore.cpp 33 KB
Newer Older
1
2
3
//================================================================================
// Name        : jobdatastore.cpp
// Author      : Axel Auweter, Micha Mueller
Micha Müller's avatar
Micha Müller committed
4
// Contact     : info@dcdb.it
5
6
7
8
9
10
// Copyright   : Leibniz Supercomputing Centre
// Description : C++ API implementation for inserting and querying DCDB job data.
//================================================================================

//================================================================================
// This file is part of DCDB (DataCenter DataBase)
11
// Copyright (C) 2011-2019 Leibniz Supercomputing Centre
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
//================================================================================

/**
29
30
31
32
 * @file
 * @brief This file contains actual implementations of the jobdatastore
 * interface. The interface itself forwards all functions to the internal
 * JobDataStoreImpl. The real logic is implemented in the JobDataStoreImpl.
33
34
 */

35
#include <cinttypes>
36
37
38
39
40
41
42
43
44
45
46
47
#include <list>
#include <string>

#include "cassandra.h"

#include "dcdb/jobdatastore.h"
#include "jobdatastore_internal.h"
#include "dcdb/connection.h"
#include "dcdbglobals.h"

using namespace DCDB;

48
49
50
51
52
53
54
55
56
57
/**
 * @details
 * Since we want high-performance inserts, we prepare the insert CQL query in
 * advance and only bind it on the actual insert.
 */
void JobDataStoreImpl::prepareInsert(uint64_t ttl) {
  CassError rc = CASS_OK;
  CassFuture* future = NULL;
  const char* query;

58
  /* Free the old prepared if necessary. */
59
60
61
62
63
64
  if (preparedInsert) {
    cass_prepared_free(preparedInsert);
  }

  char *queryBuf = NULL;
  if (ttl == 0) {
65
66
    query = "INSERT INTO " JD_KEYSPACE_NAME "." CF_JOBDATA
        " (jid, uid, start_ts, end_ts, nodes) VALUES (?, ?, ?, ?, ?);";
67
68
69
  }
  else {
    queryBuf = (char*)malloc(256);
70
71
72
    snprintf(queryBuf, 256, "INSERT INTO " JD_KEYSPACE_NAME "." CF_JOBDATA
        " (jid, uid, start_ts, end_ts, nodes) VALUES (?, ?, ?, ?, ?) "
        "USING TTL %" PRIu64 " ;", ttl);
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
    query = queryBuf;
  }

  future = cass_session_prepare(session, query);
  cass_future_wait(future);

  rc = cass_future_error_code(future);
  if (rc != CASS_OK) {
    connection->printError(future);
  } else {
    preparedInsert = cass_future_get_prepared(future);
  }

  cass_future_free(future);
  if (queryBuf) {
    free(queryBuf);
  }
}

92
93
/**
 * @details
94
 * Extract all data from the JobData object and push it into the data store.
95
 */
96
JDError JobDataStoreImpl::insertJob(JobData& jdata) {
97
98
  /* Check if the input for the primary key is valid and reasonable */
  if (jdata.startTime.getRaw() == 0) {
99
    return JD_BADPARAMS;
100
101
102
103
  }

  JDError error = JD_UNKNOWNERROR;

104
105
106
107
108
109
110
  /* Insert into Cassandra */
  CassError rc = CASS_OK;
  CassStatement* statement = NULL;
  CassFuture *future = NULL;

  statement = cass_prepared_bind(preparedInsert);

111
112
113
114
  cass_statement_bind_string_by_name(statement, "jid", jdata.jobId.c_str());
  cass_statement_bind_string_by_name(statement, "uid", jdata.userId.c_str());
  cass_statement_bind_int64_by_name(statement, "start_ts", jdata.startTime.getRaw());
  cass_statement_bind_int64_by_name(statement, "end_ts", jdata.endTime.getRaw());
115

116
  /* Copy the string node list to a varchar set */
117
118
119
120
121
122
123
124
  CassCollection* set = cass_collection_new(CASS_COLLECTION_TYPE_SET,
                                            jdata.nodes.size());
  for (auto& s : jdata.nodes) {
    cass_collection_append_string(set, s.c_str());
  }

  cass_statement_bind_collection_by_name(statement, "nodes", set);

125
  /* All parameters bound. Now execute the statement asynchronously */
126
  future = cass_session_execute(session, statement);
127
128
129
130
131

  /* Clean up in the meantime */
  cass_collection_free(set);

  /* Wait for the statement to finish */
132
133
134
135
  cass_future_wait(future);

  rc = cass_future_error_code(future);
  if (rc != CASS_OK) {
136
137
138
139
    connection->printError(future);
    error = JD_UNKNOWNERROR;
  } else {
    error = JD_OK;
140
141
142
143
  }

  cass_future_free(future);
  cass_statement_free(statement);
144
  return error;
145
146
147
148
}

/**
 * @details
149
150
151
152
153
154
155
 * Update the job with matching JobId and StartTs in the data store with the
 * values provided by the given JobData object. If no such job exists in the
 * data store yet, it is inserted.
 * The JobData object is expected to be complete. Partial
 * updates of only selected fields are not supported. Instead, one has to
 * retrieve the other JobData information via getJobById() or
 * getJobByPrimaryKey() first and complete its JobData object for the update.
156
 */
157
158
159
160
161
162
JDError JobDataStoreImpl::updateJob(JobData& jdata) {
  /* Check if the input for the primary key is valid and reasonable */
  if (jdata.startTime.getRaw() == 0) {
    return JD_BADPARAMS;
  }

163
164
  JDError error = JD_UNKNOWNERROR;

165
  /* Update entry in Cassandra (actually upserts) */
166
  CassError rc = CASS_OK;
167
168
169
170
  CassStatement* statement = nullptr;
  CassFuture* future = nullptr;
  const char* query = "UPDATE " JD_KEYSPACE_NAME "." CF_JOBDATA
      " SET uid = ?, end_ts = ?, nodes = ? WHERE jid = ? AND start_ts = ? ;";
171

172
  statement = cass_statement_new(query, 5);
173

174
175
  cass_statement_bind_string(statement, 3, jdata.jobId.c_str());
  cass_statement_bind_string(statement, 0, jdata.userId.c_str());
176
177
  cass_statement_bind_int64(statement, 4, jdata.startTime.getRaw());
  cass_statement_bind_int64(statement, 1, jdata.endTime.getRaw());
178

179
  /* Copy the string node list to a varchar set */
180
  CassCollection* set = cass_collection_new(CASS_COLLECTION_TYPE_SET, jdata.nodes.size());
181
182
183
  for (auto& s : jdata.nodes) {
    cass_collection_append_string(set, s.c_str());
  }
184

185
  cass_statement_bind_collection(statement, 2, set);
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205

  /* All parameters bound. Now execute the statement asynchronously */
  future = cass_session_execute(session, statement);

  /* Clean up in the meantime */
  cass_collection_free(set);

  /* Wait for the statement to finish */
  cass_future_wait(future);

  rc = cass_future_error_code(future);
  if (rc != CASS_OK) {
    connection->printError(future);
    error = JD_UNKNOWNERROR;
  } else {
    error = JD_OK;
  }

  cass_future_free(future);
  cass_statement_free(statement);
206

207
  return error;
208
209
210
211
}

/**
 * @details
212
213
214
 * Update the job with matching JobId and StartTs in the data store with the
 * provided end time. If no such job exists in the data store yet, it is
 * inserted.
215
 */
216
JDError JobDataStoreImpl::updateEndtime(JobId jobId, TimeStamp startTs, TimeStamp endTime) {
217
218
  /* Check if the input for the primary key is valid and reasonable */
  if (startTs.getRaw() == 0) {
219
    return JD_BADPARAMS;
220
221
222
223
  }

  JDError error = JD_UNKNOWNERROR;

224
  /* Update entry in Cassandra (actually upserts) */
225
226
227
228
  CassError rc = CASS_OK;
  CassStatement* statement = nullptr;
  CassFuture* future = nullptr;
  const char* query = "UPDATE " JD_KEYSPACE_NAME "." CF_JOBDATA
Alessio Netti's avatar
Alessio Netti committed
229
      " SET end_ts = ? WHERE jid = ? AND start_ts = ? ;";
230

231
  statement = cass_statement_new(query, 3);
232

233
  cass_statement_bind_string(statement, 1, jobId.c_str());
234
235
  cass_statement_bind_int64(statement, 2, startTs.getRaw());
  cass_statement_bind_int64(statement, 0, endTime.getRaw());
236

237
238
239
240
241
242
243
244
245
246
247
248
  /* All parameters bound. Now execute the statement asynchronously */
  future = cass_session_execute(session, statement);

  /* Wait for the statement to finish */
  cass_future_wait(future);

  rc = cass_future_error_code(future);
  if (rc != CASS_OK) {
    connection->printError(future);
    error = JD_UNKNOWNERROR;
  } else {
    error = JD_OK;
249
250
  }

251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
  cass_future_free(future);
  cass_statement_free(statement);

  return error;
}

/**
 * @details
 * Delete the entry with matching JobId and start TimeStamp from the data store.
 */
JDError JobDataStoreImpl::deleteJob(JobId jid, TimeStamp startTs) {
  JDError error = JD_UNKNOWNERROR;

  /* Remove entry from Cassandra */
  CassError rc = CASS_OK;
  CassStatement* statement = nullptr;
  CassFuture* future = nullptr;
  const char* query = "DELETE FROM " JD_KEYSPACE_NAME "." CF_JOBDATA
      " WHERE jid = ? AND start_ts = ?;";

  statement = cass_statement_new(query, 2);

273
  cass_statement_bind_string(statement, 0, jid.c_str());
274
  cass_statement_bind_int64(statement, 1, startTs.getRaw());
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289

  /* All parameters bound. Now execute the statement asynchronously */
  future = cass_session_execute(session, statement);

  /* Wait for the statement to finish */
  cass_future_wait(future);

  rc = cass_future_error_code(future);
  if (rc != CASS_OK) {
    connection->printError(future);
    error = JD_UNKNOWNERROR;
  } else {
    error = JD_OK;
  }

290
  cass_future_free(future);
291
292
293
  cass_statement_free(statement);

  return error;
294
295
296
297
}

/**
 * @details
298
299
 * Find the entry in the data store with matching JobId and start_ts and store
 * the corresponding values in the JobData object.
300
 */
301
302
JDError JobDataStoreImpl::getJobByPrimaryKey(JobData& job, JobId jid,
                                             TimeStamp startTs) {
303
304
  JDError error = JD_UNKNOWNERROR;

305
  /* Select entry from Cassandra */
306
307
308
  CassError rc = CASS_OK;
  CassStatement* statement = nullptr;
  CassFuture* future = nullptr;
309
310
  const char* query = "SELECT * FROM " JD_KEYSPACE_NAME "." CF_JOBDATA
      " WHERE jid = ? AND start_ts = ?;";
311

312
  statement = cass_statement_new(query, 2);
313

314
  cass_statement_bind_string(statement, 0, jid.c_str());
315
  cass_statement_bind_int64(statement, 1, startTs.getRaw());
316
317
318
319
320
321
322
323
324
325
326
327
328

  /* All parameters bound. Now execute the statement asynchronously */
  future = cass_session_execute(session, statement);

  /* Wait for the statement to finish */
  cass_future_wait(future);

  rc = cass_future_error_code(future);
  if (rc != CASS_OK) {
    connection->printError(future);
    error = JD_UNKNOWNERROR;
  } else {
    error = JD_OK;
329
330
331
332
333
334
335
336
337
338
339

    const CassResult* cresult = cass_future_get_result(future);
    size_t rowCnt = cass_result_row_count(cresult);

    /* Check if the returned data is reasonable */
    if (rowCnt == 0) {
      error = JD_JOBKEYNOTFOUND;
    } else {
      /* Retrieve data from result */
      const CassRow* row = cass_result_first_row(cresult);

340
341
342
      cass_int64_t startTs, endTs;
      const char *jobId, *userId;
      size_t jobId_len, userId_len;
343
      /* jid and start_ts are always set. Other values should be checked */
344
345
346
347
348
      cass_value_get_string(cass_row_get_column_by_name(row, "jid"), &jobId, &jobId_len);
      cass_value_get_int64(cass_row_get_column_by_name(row, "start_ts"), &startTs);
      if (cass_value_get_string(cass_row_get_column_by_name(row, "uid"), &userId, &userId_len) != CASS_OK) {
        userId = "";
        userId_len = 0;
349
350
        error = JD_PARSINGERROR;
      }
351
      if (cass_value_get_int64(cass_row_get_column_by_name(row, "end_ts"), &endTs) != CASS_OK) {
352
353
354
355
356
        endTs = 0;
        error = JD_PARSINGERROR;
      }

      /* Copy the data in the JobData object */
357
358
      job.jobId = (JobId) std::string(jobId, jobId_len);
      job.userId = (UserId) std::string(userId, userId_len);
359
360
361
362
363
364
365
366
367
368
369
      job.startTime = (uint64_t) startTs;
      job.endTime = (uint64_t) endTs;

      /* Do not forget about the nodes... */
      const char* nodeStr;
      size_t nodeStr_len;

      const CassValue* set = cass_row_get_column_by_name(row, "nodes");
      CassIterator* setIt = cass_iterator_from_collection(set);

      while (cass_iterator_next(setIt)) {
370
        cass_value_get_string(cass_iterator_get_value(setIt), &nodeStr, &nodeStr_len);
371
372
373
374
375
376
377
        job.nodes.emplace_back(nodeStr, nodeStr_len);
      }

      cass_iterator_free(setIt);
    }

    cass_result_free(cresult);
378
379
380
381
382
383
  }

  cass_future_free(future);
  cass_statement_free(statement);

  return error;
384
385
386
387
}

/**
 * @details
388
389
 * Find the entry in the data store with matching JobId and highest start_ts
 * value (= most recent job) and store the
390
 * corresponding values in the JobData object.
391
 */
392
JDError JobDataStoreImpl::getJobById(JobData& job, JobId jid) {
393
394
395
396
397
398
399
  JDError error = JD_UNKNOWNERROR;

  /* Select entry from Cassandra */
  CassError rc = CASS_OK;
  CassStatement* statement = nullptr;
  CassFuture* future = nullptr;
  const char* query = "SELECT * FROM " JD_KEYSPACE_NAME "." CF_JOBDATA
400
      " WHERE jid = ? ORDER BY start_ts DESC LIMIT 1;";
401
402
403

  statement = cass_statement_new(query, 1);

404
  cass_statement_bind_string(statement, 0, jid.c_str());
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427

  /* All parameters bound. Now execute the statement asynchronously */
  future = cass_session_execute(session, statement);

  /* Wait for the statement to finish */
  cass_future_wait(future);

  rc = cass_future_error_code(future);
  if (rc != CASS_OK) {
    connection->printError(future);
    error = JD_UNKNOWNERROR;
  } else {
    error = JD_OK;

    const CassResult* cresult = cass_future_get_result(future);
    size_t rowCnt = cass_result_row_count(cresult);

    /* Check if the returned data is reasonable */
    if (rowCnt == 0) {
      error = JD_JOBIDNOTFOUND;
    } else {
      /* Retrieve data from result */
      const CassRow* row = cass_result_first_row(cresult);
428
429
430
431
      
      cass_int64_t startTs, endTs;
      const char *jobId, *userId;
      size_t jobId_len, userId_len;
432

433
      /* jid and start_ts are always set. Other values should be checked */
434
435
436
437
438
      cass_value_get_string(cass_row_get_column_by_name(row, "jid"), &jobId, &jobId_len);
      cass_value_get_int64(cass_row_get_column_by_name(row, "start_ts"), &startTs);
      if (cass_value_get_string(cass_row_get_column_by_name(row, "uid"), &userId, &userId_len) != CASS_OK) {
        userId = "";
        userId_len = 0;
439
440
        error = JD_PARSINGERROR;
      }
441
      if (cass_value_get_int64(cass_row_get_column_by_name(row, "end_ts"), &endTs) != CASS_OK) {
442
443
444
445
446
        endTs = 0;
        error = JD_PARSINGERROR;
      }

      /* Copy the data in the JobData object */
447
448
      job.jobId = (JobId) std::string(jobId, jobId_len);
      job.userId = (UserId) std::string(userId, userId_len);
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
      job.startTime = (uint64_t) startTs;
      job.endTime = (uint64_t) endTs;

      /* Do not forget about the nodes... */
      const char* nodeStr;
      size_t nodeStr_len;

      const CassValue* set = cass_row_get_column_by_name(row, "nodes");
      CassIterator* setIt = cass_iterator_from_collection(set);

      while (cass_iterator_next(setIt)) {
        cass_value_get_string(cass_iterator_get_value(setIt),
                              &nodeStr, &nodeStr_len);
        job.nodes.emplace_back(nodeStr, nodeStr_len);
      }

      cass_iterator_free(setIt);
    }

    cass_result_free(cresult);
  }

  cass_future_free(future);
  cass_statement_free(statement);

  return error;
475
476
477
478
}

/**
 * @details
479
480
 * Find all entries in the data store whose start_ts AND end_ts lay within
 * the specified interval. Store the found entries in the JobData list.
481
 */
482
483
484
JDError JobDataStoreImpl::getJobsInIntervalExcl(std::list<JobData>& jobs,
                                                TimeStamp intervalStart,
                                                TimeStamp intervalEnd) {
485
486
  /* Check if the input is valid and reasonable */
  if (intervalEnd.getRaw() == 0) {
487
    return JD_BADPARAMS;
488
489
  }
  if (intervalStart >= intervalEnd) {
490
    return JD_BADPARAMS;
491
492
493
494
495
496
497
498
499
  }

  JDError error = JD_UNKNOWNERROR;

  /* Select entries from Cassandra */
  CassError rc = CASS_OK;
  CassStatement* statement = nullptr;
  CassFuture* future = nullptr;
  const char* query = "SELECT * FROM " JD_KEYSPACE_NAME "." CF_JOBDATA
500
      " WHERE start_ts >= ? AND end_ts <= ? ALLOW FILTERING;";
501
502

  statement = cass_statement_new(query, 2);
503
  cass_statement_set_paging_size(statement, PAGING_SIZE);
504
505
506
507

  cass_statement_bind_int64(statement, 0, intervalStart.getRaw());
  cass_statement_bind_int64(statement, 1, intervalEnd.getRaw());

508
509
510
511
  bool morePages = false;
  do {
      /* All parameters bound. Now execute the statement asynchronously */
      future = cass_session_execute(session, statement);
512
    
513
514
      /* Wait for the statement to finish */
      cass_future_wait(future);
515
    
516
517
518
519
520
521
522
523
524
525
526
527
528
      rc = cass_future_error_code(future);
      if (rc != CASS_OK) {
        connection->printError(future);
        error = JD_UNKNOWNERROR;
        morePages = false;
      } else {
        error = JD_OK;
    
        /* Retrieve data from result */
        const CassResult* cresult = cass_future_get_result(future);
        CassIterator* rowIt = cass_iterator_from_result(cresult);
        
        error = parseJobs(rowIt, jobs, NULL);
529

530
531
532
533
534
535
        if((morePages = cass_result_has_more_pages(cresult)))
            cass_statement_set_paging_state(statement, cresult);
        
        cass_iterator_free(rowIt);
        cass_result_free(cresult);
      }
536

537
538
539
540
541
542
      cass_future_free(future);
      
  }
  while(morePages);
  
  cass_statement_free(statement);
543
544
  return error;
}
545

546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
/**
 * @details
 * Find all entries in the data store corresponding to jobs that were running 
 * in the queried time interval, i.e., their start time is less than the queried 
 * intervalEnd, and their end time is 0 or greater than startInterval.
 */
JDError JobDataStoreImpl::getJobsInIntervalRunning(std::list<JobData>& jobs,
                                                TimeStamp intervalStart,
                                                TimeStamp intervalEnd) {
    /* Check if the input is valid and reasonable */
    if (intervalEnd.getRaw() == 0) {
        return JD_BADPARAMS;
    }
    if (intervalStart >= intervalEnd) {
        return JD_BADPARAMS;
    }
562

563
    JDError error = JD_UNKNOWNERROR;
564

565
566
    /* +++ First SELECT +++ */
    /* Select entries from Cassandra where start_ts lays within the interval */
567
    std::unordered_set<JobId> jobIds;
568
569
570
571
    CassError rc = CASS_OK;
    CassStatement* statement = nullptr;
    CassFuture* future = nullptr;
    const char* query = "SELECT * FROM " JD_KEYSPACE_NAME "." CF_JOBDATA
572
    " WHERE start_ts <= ? AND start_ts > ? AND end_ts = ? ALLOW FILTERING;";
573

574
    statement = cass_statement_new(query, 3);
575
    cass_statement_set_paging_size(statement, PAGING_SIZE);
576

577
578
    cass_statement_bind_int64(statement, 0, intervalEnd.getRaw());
    cass_statement_bind_int64(statement, 1, (int64_t)0);
579
    cass_statement_bind_int64(statement, 2, (int64_t)0);
580

581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
    bool morePages = false;
    do {
        /* All parameters bound. Now execute the statement asynchronously */
        future = cass_session_execute(session, statement);
    
        /* Wait for the statement to finish */
        cass_future_wait(future);
    
        rc = cass_future_error_code(future);
        if (rc != CASS_OK) {
            connection->printError(future);
            error = JD_UNKNOWNERROR;
            morePages = false;
        } else {
            error = JD_OK;
    
            /* Retrieve data from result */
            const CassResult* cresult = cass_future_get_result(future);
            CassIterator* rowIt = cass_iterator_from_result(cresult);
    
            error = parseJobs(rowIt, jobs, &jobIds);
602

603
604
605
606
607
608
            if((morePages = cass_result_has_more_pages(cresult)))
                cass_statement_set_paging_state(statement, cresult);
    
            cass_iterator_free(rowIt);
            cass_result_free(cresult);
        }
609

610
        cass_future_free(future);
611

612
    }
613
614
    while(morePages);
    
615
    cass_statement_free(statement);
616

617
618
619
    /* +++ Second SELECT +++ */
    /* Select entries from Cassandra where end_ts lays within the interval */
    query = "SELECT * FROM " JD_KEYSPACE_NAME "." CF_JOBDATA
620
    " WHERE start_ts <= ? AND start_ts > ? AND end_ts >= ? ALLOW FILTERING;";
621

622
    statement = cass_statement_new(query, 3);
623
    cass_statement_set_paging_size(statement, PAGING_SIZE);
624
625

    cass_statement_bind_int64(statement, 0, intervalEnd.getRaw());
626
627
    cass_statement_bind_int64(statement, 1, (int64_t)0);
    cass_statement_bind_int64(statement, 2, intervalStart.getRaw());
628

629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
    morePages = false;
    do {
        /* All parameters bound. Now execute the statement asynchronously */
        future = cass_session_execute(session, statement);
    
        /* Wait for the statement to finish */
        cass_future_wait(future);
    
        rc = cass_future_error_code(future);
        if (rc != CASS_OK) {
            connection->printError(future);
            error = JD_UNKNOWNERROR;
            morePages = false;
        } else {
            /* Retrieve data from result */
            const CassResult* cresult = cass_future_get_result(future);
            CassIterator* rowIt = cass_iterator_from_result(cresult);
    
            error = parseJobs(rowIt, jobs, &jobIds);
648

649
650
651
652
653
654
            if((morePages = cass_result_has_more_pages(cresult)))
                cass_statement_set_paging_state(statement, cresult);
    
            cass_iterator_free(rowIt);
            cass_result_free(cresult);
        }
655

656
657
        cass_future_free(future);
        
658
    }
659
660
    while(morePages);
    
661
662
663
    cass_statement_free(statement);

    return error;
664
665
666
667
}

/**
 * @details
668
669
670
671
672
673
 * Find all entries in the data store whose start_ts OR end_ts lays within
 * the specified interval. Store the found entries in the JobData list.
 * Cassandra only supports AND conditions in its query language. Therefore
 * we cannot SELECT directly the required jobs. Instead we have to do two
 * selects and manually deduplicate the results.
 * TODO Doing two request successively may tangle up error codes.
674
 */
675
676
677
JDError JobDataStoreImpl::getJobsInIntervalIncl(std::list<JobData>& jobs,
                                                TimeStamp intervalStart,
                                                TimeStamp intervalEnd) {
678
679
  /* Check if the input is valid and reasonable */
  if (intervalEnd.getRaw() == 0) {
680
    return JD_BADPARAMS;
681
682
  }
  if (intervalStart >= intervalEnd) {
683
    return JD_BADPARAMS;
684
685
686
687
688
689
  }

  JDError error = JD_UNKNOWNERROR;

  /* +++ First SELECT +++ */
  /* Select entries from Cassandra where start_ts lays within the interval */
690
  std::unordered_set<JobId> jobIds;
691
692
693
694
  CassError rc = CASS_OK;
  CassStatement* statement = nullptr;
  CassFuture* future = nullptr;
  const char* query = "SELECT * FROM " JD_KEYSPACE_NAME "." CF_JOBDATA
695
      " WHERE start_ts >= ? AND start_ts <= ? ALLOW FILTERING;";
696
697

  statement = cass_statement_new(query, 2);
698
  cass_statement_set_paging_size(statement, PAGING_SIZE);
699
700
701
702

  cass_statement_bind_int64(statement, 0, intervalStart.getRaw());
  cass_statement_bind_int64(statement, 1, intervalEnd.getRaw());

703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
  bool morePages = false;
  do {
      /* All parameters bound. Now execute the statement asynchronously */
      future = cass_session_execute(session, statement);
    
      /* Wait for the statement to finish */
      cass_future_wait(future);
    
      rc = cass_future_error_code(future);
      if (rc != CASS_OK) {
        connection->printError(future);
        error = JD_UNKNOWNERROR;
        morePages = false;
      } else {
        error = JD_OK;
    
        /* Retrieve data from result */
        const CassResult* cresult = cass_future_get_result(future);
        CassIterator* rowIt = cass_iterator_from_result(cresult);
    
        error = parseJobs(rowIt, jobs, &jobIds);
724

725
726
727
728
729
730
        if((morePages = cass_result_has_more_pages(cresult)))
            cass_statement_set_paging_state(statement, cresult);
    
        cass_iterator_free(rowIt);
        cass_result_free(cresult);
      }
731

732
      cass_future_free(future);
733
734

  }
735
736
  while(morePages);
  
737
738
739
740
741
  cass_statement_free(statement);

  /* +++ Second SELECT +++ */
  /* Select entries from Cassandra where end_ts lays within the interval */
  query = "SELECT * FROM " JD_KEYSPACE_NAME "." CF_JOBDATA
742
      " WHERE end_ts >= ? AND end_ts <= ? ALLOW FILTERING;";
743
744

  statement = cass_statement_new(query, 2);
745
  cass_statement_set_paging_size(statement, PAGING_SIZE);
746
747
748
749

  cass_statement_bind_int64(statement, 0, intervalStart.getRaw());
  cass_statement_bind_int64(statement, 1, intervalEnd.getRaw());

750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
  morePages = false;
  do {
      /* All parameters bound. Now execute the statement asynchronously */
      future = cass_session_execute(session, statement);
    
      /* Wait for the statement to finish */
      cass_future_wait(future);
    
      rc = cass_future_error_code(future);
      if (rc != CASS_OK) {
        connection->printError(future);
        error = JD_UNKNOWNERROR;
        morePages = false;
      } else {
        /* Retrieve data from result */
        const CassResult* cresult = cass_future_get_result(future);
        CassIterator* rowIt = cass_iterator_from_result(cresult);
    
        error = parseJobs(rowIt, jobs, &jobIds);
769

770
771
772
773
774
775
        if((morePages = cass_result_has_more_pages(cresult)))
            cass_statement_set_paging_state(statement, cresult);
    
        cass_iterator_free(rowIt);
        cass_result_free(cresult);
      }
776

777
      cass_future_free(future);
778
779

  }
780
781
  while(morePages);
  
782
783
784
  cass_statement_free(statement);

  return error;
785
786
}

787
JDError JobDataStoreImpl::parseJobs(CassIterator* rowIt, std::list<JobData>& jobs, std::unordered_set<JobId>* jobIds) {
788
    JDError error = JD_OK;
789
790
791
    cass_int64_t startTs, endTs;
    const char *jobId, *userId;
    size_t jobId_len, userId_len;
792
793
794
795
796
    JobData job;
    while (cass_iterator_next(rowIt)) {
        const CassRow *row = cass_iterator_get_row(rowIt);

        /* jid and uid should always be set. Other values should be checked */
797
798
799
800
801
802
        cass_value_get_string(cass_row_get_column_by_name(row, "jid"), &jobId, &jobId_len);
        if (cass_value_get_string(cass_row_get_column_by_name(row, "uid"), &userId, &userId_len) != CASS_OK) {
            userId = "";
            userId_len = 0;
            error = JD_PARSINGERROR;
        }
803
804
805
806
807
808
809
810
811
812
        if (cass_value_get_int64(cass_row_get_column_by_name(row, "start_ts"), &startTs) != CASS_OK) {
            startTs = 0;
            error = JD_PARSINGERROR;
        }
        if (cass_value_get_int64(cass_row_get_column_by_name(row, "end_ts"), &endTs) != CASS_OK) {
            endTs = 0;
            error = JD_PARSINGERROR;
        }

        /* Copy the data into job object */
813
        job.jobId = (JobId) std::string(jobId, jobId_len);
814
815
        /* Set-based deduplication */
        if (jobIds==nullptr || jobIds->insert(job.jobId).second) {
816
            job.userId = (UserId) std::string(userId, userId_len);
817
818
819
820
821
822
823
824
825
826
827
            job.startTime = (uint64_t) startTs;
            job.endTime = (uint64_t) endTs;

            /* Do not forget about the nodes... */
            const char *nodeStr;
            size_t nodeStr_len;

            const CassValue *set = cass_row_get_column_by_name(row, "nodes");
            CassIterator *setIt = cass_iterator_from_collection(set);

            while (cass_iterator_next(setIt)) {
828
                cass_value_get_string(cass_iterator_get_value(setIt), &nodeStr, &nodeStr_len);
829
830
831
832
833
834
835
836
837
838
839
840
                job.nodes.emplace_back(nodeStr, nodeStr_len);
            }

            //TODO job.nodes list deep copied?
            jobs.push_back(job);
            job.nodes.clear();
            cass_iterator_free(setIt);
        }
    }
    return error;
}

841
842
/**
 * @details
843
844
 * Find the entry in the data store with matching JobId and highest start_ts
 * value (= most recent job) and store the
845
 * corresponding nodes in the NodeList.
846
 */
847
JDError JobDataStoreImpl::getNodeList(NodeList& nodes, JobId jid, TimeStamp startTs) {
848
849
850
851
852
853
854
  JDError error = JD_UNKNOWNERROR;

  /* Select entry from Cassandra */
  CassError rc = CASS_OK;
  CassStatement* statement = nullptr;
  CassFuture* future = nullptr;
  const char* query = "SELECT nodes FROM " JD_KEYSPACE_NAME "." CF_JOBDATA
855
      " WHERE jid = ? ORDER BY start_ts LIMIT 1;";
856
857
858

  statement = cass_statement_new(query, 1);

859
  cass_statement_bind_string(statement, 0, jid.c_str());
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891

  /* All parameters bound. Now execute the statement asynchronously */
  future = cass_session_execute(session, statement);

  /* Wait for the statement to finish */
  cass_future_wait(future);

  rc = cass_future_error_code(future);
  if (rc != CASS_OK) {
    connection->printError(future);
    error = JD_UNKNOWNERROR;
  } else {
    error = JD_OK;

    const CassResult* cresult = cass_future_get_result(future);
    size_t rowCnt = cass_result_row_count(cresult);

    /* Check if the returned data is reasonable */
    if (rowCnt == 0) {
      error = JD_JOBIDNOTFOUND;
    } else {
      /* Retrieve data from result */
      const CassRow* row = cass_result_first_row(cresult);

      /* Copy the nodes in the NodeList */
      const char* nodeStr;
      size_t nodeStr_len;

      const CassValue* set = cass_row_get_column_by_name(row, "nodes");
      CassIterator* setIt = cass_iterator_from_collection(set);

      while (cass_iterator_next(setIt)) {
892
        cass_value_get_string(cass_iterator_get_value(setIt), &nodeStr, &nodeStr_len);
893
894
895
896
897
898
899
900
901
902
903
904
905
        nodes.emplace_back(nodeStr, nodeStr_len);
      }

      cass_iterator_free(setIt);
    }

    cass_result_free(cresult);
  }

  cass_future_free(future);
  cass_statement_free(statement);

  return error;
906
907
908
909
910
911
912
913
914
915
916
}

/**
 * @details
 * This constructor sets the internal connection variable to
 * the externally provided Connection object and also
 * retrieves the CassSession pointer of the connection.
 */
JobDataStoreImpl::JobDataStoreImpl(Connection* conn) {
  connection = conn;
  session = connection->getSessionHandle();
917
918
919

  preparedInsert = nullptr;
  prepareInsert(0);
920
921
922
923
924
}

/**
 * @details
 * The destructor just resets the internal pointers. Deletion of the pointers
925
 * (except preparedInsert) is not our responsibility.
926
927
928
929
 */
JobDataStoreImpl::~JobDataStoreImpl() {
  connection = nullptr;
  session = nullptr;
930
931
932
  if (preparedInsert) {
      cass_prepared_free(preparedInsert);
  }
933
934
935
936
937
938
939
940
941
}

/* ########################################################################## */

/**
 * @details
 * Instead of doing the actual work, this function simply forwards to the
 * corresponding function of the JobDataStoreImpl class.
 */
942
943
JDError JobDataStore::insertJob(JobData& jdata) {
  return impl->insertJob(jdata);
944
945
946
947
948
949
950
}

/**
 * @details
 * Instead of doing the actual work, this function simply forwards to the
 * corresponding function of the JobDataStoreImpl class.
 */
951
952
953
954
955
956
957
958
959
960
961
962
JDError JobDataStore::updateJob(JobData& jdata) {
  return impl->updateJob(jdata);
}

/**
 * @details
 * Instead of doing the actual work, this function simply forwards to the
 * corresponding function of the JobDataStoreImpl class.
 */
JDError JobDataStore::updateEndtime(JobId jobId, TimeStamp startTs,
                                    TimeStamp endTime) {
  return impl->updateEndtime(jobId, startTs, endTime);
963
964
965
966
967
968
969
}

/**
 * @details
 * Instead of doing the actual work, this function simply forwards to the
 * corresponding function of the JobDataStoreImpl class.
 */
970
971
JDError JobDataStore::deleteJob(JobId jid, TimeStamp startTs) {
  return impl->deleteJob(jid, startTs);
972
973
974
975
976
977
978
}

/**
 * @details
 * Instead of doing the actual work, this function simply forwards to the
 * corresponding function of the JobDataStoreImpl class.
 */
979
980
981
JDError JobDataStore::getJobByPrimaryKey(JobData& job, JobId jid,
                                         TimeStamp startTs) {
  return impl->getJobByPrimaryKey(job, jid, startTs);
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
}

/**
 * @details
 * Instead of doing the actual work, this function simply forwards to the
 * corresponding function of the JobDataStoreImpl class.
 */
JDError JobDataStore::getJobById(JobData& job, JobId jid) {
  return impl->getJobById(job, jid);
}

/**
 * @details
 * Instead of doing the actual work, this function simply forwards to the
 * corresponding function of the JobDataStoreImpl class.
 */
998
999
1000
1001
JDError JobDataStore::getJobsInIntervalExcl(std::list<JobData>& jobs,
                                            TimeStamp intervalStart,
                                            TimeStamp intervalEnd) {
  return impl->getJobsInIntervalExcl(jobs, intervalStart, intervalEnd);
1002
1003
}

1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
/**
 * @details
 * Instead of doing the actual work, this function simply forwards to the
 * corresponding function of the JobDataStoreImpl class.
 */
JDError JobDataStore::getJobsInIntervalRunning(std::list<JobData>& jobs,
                                            TimeStamp intervalStart,
                                            TimeStamp intervalEnd) {
    return impl->getJobsInIntervalRunning(jobs, intervalStart, intervalEnd);
}

1015
1016
1017
1018
1019
/**
 * @details
 * Instead of doing the actual work, this function simply forwards to the
 * corresponding function of the JobDataStoreImpl class.
 */
1020
1021
1022
1023
JDError JobDataStore::getJobsInIntervalIncl(std::list<JobData>& jobs,
                                            TimeStamp intervalStart,
                                            TimeStamp intervalEnd) {
  return impl->getJobsInIntervalIncl(jobs, intervalStart, intervalEnd);
1024
1025
1026
1027
1028
1029
1030
}

/**
 * @details
 * Instead of doing the actual work, this function simply forwards to the
 * corresponding function of the JobDataStoreImpl class.
 */
1031
1032
1033
JDError JobDataStore::getNodeList(NodeList& nodes, JobId jid,
                                  TimeStamp startTs) {
  return impl->getNodeList(nodes, jid, startTs);
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
}

/**
 * @details
 * This constructor allocates the implementation class which
 * holds the actual implementation of the class functionality.
 */
JobDataStore::JobDataStore(Connection* conn) {
  impl = new JobDataStoreImpl(conn);
}

/**
 * @details
1047
 * The JobDataStore destructor deallocates the
1048
1049
1050
1051
 * JobDataStoreImpl and CassandraBackend objects.
 */
JobDataStore::~JobDataStore() {
  /* Clean up... */
1052
1053
1054
  if (impl) {
    delete impl;
  }
1055
}