Commit 5e727ec0 authored by Alessio Netti's avatar Alessio Netti
Browse files

Merge remote-tracking branch 'remotes/origin/slurmJobdata'

# Conflicts:
#	.gitignore
parents 25a4ae03 760d5bf6
......@@ -18,6 +18,7 @@ OBJS = src/connection.o \
src/virtualsensor.o \
src/c_api.o \
src/sensoroperations.o \
src/jobdatastore.o \
src/version.o
# List of public header files necessary to use this libray
......
//================================================================================
// Name : c_api.h
// Author : Axel Auweter
// Author : Axel Auweter, Micha Mueller
// Copyright : Leibniz Supercomputing Centre
// Description : C Application Programming Interface for libdcdb
//================================================================================
//================================================================================
// This file is part of DCDB (DataCenter DataBase)
// Copyright (C) 2011-2016 Leibniz Supercomputing Centre
// Copyright (C) 2011-2018 Leibniz Supercomputing Centre
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
......@@ -33,6 +33,9 @@
#include <stdint.h>
#include <time.h>
#include "dcdb/jobdatastore.h"
#include "dcdb/connection.h"
#ifndef DCDB_C_API_H
#define DCDB_C_API_H
......@@ -50,7 +53,8 @@ typedef enum {
DCDB_C_EMPTYSET, /**< The query into the database resulted in an empty set. */
DCDB_C_NOTINTEGRABLE, /**< One of the QuerySum() functions was called on a sensor that is not marked as integrable. */
DCDB_C_NOSENSOR, /**< The caller did not specify a sensor to be queried. */
DCDB_C_UNKNOWN /**< An unknown error occured. */
DCDB_C_BADPARAMS, /**< The provided function parameters are malformed or incomplete */
DCDB_C_UNKNOWN /**< An unknown error occurred. */
} DCDB_C_RESULT;
/**
......@@ -156,6 +160,130 @@ DCDB_C_RESULT dcdbQuerySumMultipleThreaded(
unsigned int numThreads
);
/*****************************************************************************/
/* Following are C-API functions to insert job information. */
/* Intended to be called from python-script. */
/* */
/* Expected order: */
/* 1. Create a database-connection via connectToDatabase(). */
/* 2. Create a JobDataStore object with the previous connection. */
/* 3. Insert all jobs via insertJobIntoDatabase and the previous JobDataStore*/
/* 4. If finished, destroy JobDataStore and Connection object with their */
/* corresponding destruct/disconnect methods. */
/*****************************************************************************/
/**
* @brief Construct a new DCDB::Connection object and connect it to database.
*
* @param hostname Hostname of database node
* @param port TCP port to use for connecting to the database node
* (for Cassandra, this is usually 9042).
* @return Pointer to the new Connection object, or NULL if an error occurred.
*
* @details
* Constructs a new Connection object via < b>new< \b>. Then tries to connect
* it to the database. If the connection attempt fails, the connection is
* destroyed and NULL is returned instead.
*/
DCDB::Connection* connectToDatabase(const char* hostname, uint16_t port);
/**
* @brief Disconnect and destroy a DCDB::Connection object.
*
* @param conn Pointer to the Connection, which shall be destroyed.
* @return Always returns DCDB_C_OK.
*
* @details
* First, if the Connection object pointed to by conn is still connected, it is
* disconnected from the database. Afterwards the object is deconstructed via
* < b>delete< \b>. This function is guaranteed to always succeed.
*
*/
DCDB_C_RESULT disconnectFromDatabase(DCDB::Connection* conn);
/**
* @brief Construct a new DCDB::JobDataStore object.
*
* @param conn Database connection, required to access the database.
*
* @return Pointer to the new JobDataStore object, or NULL if an error
* occurred.
*
* @details
* Construct a new JobDataStore object via < b>new< \b>. The
* given Connection object must already be connected to the database,
* otherwise later JobDataStore operations will fail.
*/
DCDB::JobDataStore* constructJobDataStore(DCDB::Connection* conn);
/**
* @brief Insert a starting job into the database.
*
* @param jds Pointer to JobDataStore object, which shall be used to
* insert the job.
* @param jid SLURM id of the job.
* @param uid SLURM user id of the job owner.
* @param startTs Start time of the job (in ns since Unix epoch).
* @param nodes String array of node names used by the job.
* @param nodeSize Size of the nodes array.
*
* @return DCDB_C_OK if the job was successfully inserted. DCDB_BAD_PARAMS if
* the given parameters were illogical. DCDB_C_UNKNOWN otherwise.
*
* @details
* Builds a JobData struct from (jid, uid, startTs, endTs, nodes, nodeSize) and
* then tries to insert it by calling the corresponding insert function of jds.
*/
DCDB_C_RESULT insertJobStart(DCDB::JobDataStore* jds, DCDB::JobId jid,
DCDB::UserId uid, uint64_t startTs,
const char ** nodes, unsigned nodeSize);
/**
* @brief Update the end time of the most recent job with Id jid.
*
* @param jds Pointer to JobDataStore object, which shall be used to insert
* the job.
* @param jid SLURM id of the job.
* @param endTs End time of the job (in ns since Unix epoch).
*
* @return DCDB_C_OK if the job was successfully updated. DCDB_BAD_PARAMS if
* no job with the given JobId exists. DCDB_C_UNKNOWN otherwise.
*/
DCDB_C_RESULT updateJobEnd(DCDB::JobDataStore* jds, DCDB::JobId jid,
uint64_t endTs);
/**
* @brief For Debugging. Print the jobdata or an appropriate error message.
*
* @param jds Pointer to JobDataStore object, which to query for the JobId.
* @param jid SLURM id of the job to be printed.
*
* @return DCDB_C_OK.
*
* @details
* For fast testing if a job was inserted correctly, this method allows to
* query the datastore for the job and print its data. If the job could not
* be found or another error was encountered, an appropriate error message is
* printed.
*/
DCDB_C_RESULT printJob(DCDB::JobDataStore* jds, DCDB::JobId jid);
/**
* @brief Destroy a DCDB::JobDataStore object.
*
* @param jds Pointer to the JobDataStore, which shall be destroyed.
* @return Always returns DCDB_C_OK.
*
* @details
* The object is deconstructed via < b>delete< \b>. This function is guaranteed
* to always succeed. NOTE: Does not delete the connection object, which was
* given to the JobDataStore at construction time. Use disconnectFromDatabase()
* to destroy the Connection object.
*
*/
DCDB_C_RESULT destructJobDataStore(DCDB::JobDataStore* jds);
#ifdef __cplusplus
}
#endif
......
//================================================================================
// Name : jobdatastore.h
// Author : Axel Auweter, Micha Mueller
// Copyright : Leibniz Supercomputing Centre
// Description : C++ API for inserting and querying DCDB job data.
//================================================================================
//================================================================================
// This file is part of DCDB (DataCenter DataBase)
// Copyright (C) 2011-2018 Leibniz Supercomputing Centre
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
//================================================================================
/**
* @file
* @brief This file contains parts of the public API for the libdcdb library.
* It contains the class definition of the JobDataStore class, that handles
* database operations for job data.
*/
#ifndef DCDB_JOBDATASTORE_H
#define DCDB_JOBDATASTORE_H
#include <cstdint>
#include <list>
#include <string>
#include "cassandra.h"
#include "connection.h"
#include "timestamp.h"
namespace DCDB {
/* Forward-declaration of the implementation-internal classes */
class JobDataStoreImpl;
using JobId = uint32_t;
using UserId = uint32_t;
using NodeList = std::list<std::string>;
/**
* @brief This struct is a container for the information DCDB keeps about
* SLURM jobs. Both jobId and startTime are required to uniquely identify a
* job.
*/
struct JobData {
JobId jobId; /**< SLURM job id of the job. */
UserId userId; /**< Id of the user who submitted the job. */
TimeStamp startTime;/**< Time when the job started (started != submitted)
in ns since Unix epoch. */
TimeStamp endTime; /**< Time when the job finished in ns since Unix
epoch. */
NodeList nodes; /**< List of nodes the job occupied. */
/* extend as required */
};
typedef enum {
JD_OK, /**< Everything went fine. */
JD_JOBKEYNOTFOUND,/**< Not job with matching primary key was found */
JD_JOBIDNOTFOUND, /**< The given JobId was not found in the data store. */
JD_BADPARAMS, /**< The provided parameters are ill-formed. Either
because they are erroneous or incomplete. */
JD_PARSINGERROR, /**< Data retrieved from the data store could not be
parsed and a default value was returned instead.
Use results with care and on own risk. */
JD_UNKNOWNERROR /**< An unknown error occurred. */
} JDError;
/**
* @brief JobDataStore is the class of the libdcdb library
* to write and read job data.
*/
class JobDataStore {
private:
JobDataStoreImpl* impl;
public:
/**
* @brief This function inserts a single job into the database.
*
* @param jdata Reference to a JobData object filled with all the
* information about the job. At least jobId and startTime
* have to be filled in as they form the primary key.
* Other JobData values may be left out and can be updated
* later with updateJob().
* @return See JDError
*/
JDError insertJob(JobData& jdata);
/**
* @brief Update a job.
*
* @details Updates the job in the database whose primary key matches
* jdata. If no entry is found a new one is created (upsert).
* Updates all values of the JobData struct.
*
* @param jdata Reference to a JobData object filled with all the
* information about the job.
* @return See JDError.
*/
JDError updateJob(JobData& jdata);
/**
* @brief Update the end time of the job with matching primary key.
*
* @param jobId JobId of the job to be updated. Makes up the
* primary key together with startTime.
* @param startTime Start time of the job. Part of the primary key.
* @param endTime New endTime to be inserted.
*
* return See JDError
*/
JDError updateEndtime(JobId jobId, TimeStamp startTs,
TimeStamp endTime);
/**
* @brief Deletes a job from the job data list.
*
* @param jid JobId. Makes up the primary key together with startTs.
* @param startTs Start timestamp of the job. Part of the primary key.
* @return See JDError.
*/
JDError deleteJob(JobId jid, TimeStamp startTs);
/**
* @brief Retrieve a job by its primary key.
*
* @param job Reference to a JobData object that will be populated
* with the job data.
* @param jid Id of the job to be retrieved. Makes up the primary key
* together with startTs.
* @param startTs Start time of the job. Part of the primary key.
* @return See JDError.
*/
JDError getJobByPrimaryKey(JobData& job, JobId jid, TimeStamp startTs);
/**
* @brief Retrieve the most recent job with jid.
*
* @param job Reference to a JobData object that will be populated with
* the job data.
* @param jid Id of the job whose information should be retrieved. If
* multiple jobs with the same jid are present the most
* recent one is returned.
* @return See JDError.
*/
JDError getJobById(JobData& job, JobId jid);
/**
* @brief Retrieve an exclusive list of jobs which were run in the given
* time interval.
*
* @details EXCLUSIVE version; only jobs whose start AND end time lay
* within the interval are returned. See also
* getJobsInIntervalIncl().
*
* @param jobs Reference to a list of JobData that will be
* populated with the jobs.
* @param intervalStart Start time of the interval.
* @param intervalEnd End time of the interval.
* @return See JDError.
*/
JDError getJobsInIntervalExcl(std::list<JobData>& jobs,
TimeStamp intervalStart,
TimeStamp intervalEnd);
/**
* @brief Retrieve an inclusive list of jobs which were run in the given
* time interval.
*
* @details INCLUSIVE version; all jobs whose start OR end time lays
* within the interval are returned. See also
* getJobsInIntervalExcl().
*
* @param jobs Reference to a list of JobData that will be
* populated with the jobs.
* @param intervalStart Start time of the interval.
* @param intervalEnd End time of the interval.
* @return See JDError.
*/
JDError getJobsInIntervalIncl(std::list<JobData>& jobs,
TimeStamp intervalStart,
TimeStamp intervalEnd);
/**
* @brief Retrieve the list of nodes which were used by a job.
*
* @param nodes Reference to a NodeList which will be populated with
* the nodes.
* @param jid Id of the job whose nodes should be retrieved.
* @param startTs Start timestamp of the job to make up the full primary
* key.
* @return See JDError.
*/
JDError getNodeList(NodeList& nodes, JobId jid, TimeStamp startTs);
/**
* @brief A shortcut constructor for a JobDataStore object that allows
* accessing the data store through a connection that is already
* established.
* @param conn The Connection object of an established connection to
* Cassandra.
*/
JobDataStore(Connection* conn);
/**
* @brief The standard destructor for a JobDatStore object.
*/
virtual ~JobDataStore();
};
} /* End of namespace DCDB */
#endif /* DCDB_JOBDATASTORE_H */
......@@ -116,7 +116,7 @@ protected:
* @param primaryKey A primary key definition (one or more fields)
* @param options A Cassandra WITH statement for keyspace generation
*/
void createColumnFamily(std::string name, std::string fields, std::string primaryKey, std::string options);
void createColumnFamily(std::string name, std::string fields, std::string primaryKey, std::string options = "");
public:
......
......@@ -33,6 +33,11 @@
#ifndef DCDB_GLOBALS_H
#define DCDB_GLOBALS_H
/* Legend:
* CF = Column Family
* JD = Job Data
*/
#define KEYSPACE_NAME "dcdb"
#define CF_SENSORDATA "sensordata"
#define SENSORDATA_GC_GRACE_SECONDS "600"
......@@ -41,4 +46,7 @@
#define CF_PUBLISHEDSENSORS "publishedsensors"
#define CF_VIRTUALSENSORS "virtualsensors"
#define JD_KEYSPACE_NAME KEYSPACE_NAME "_jobdata"
#define CF_JOBDATA "jobdata"
#endif /* DCDB_GLOBALS_H */
//================================================================================
// Name : jobdatastore_internal.h
// Author : Axel Auweter, Micha Mueller
// Copyright : Leibniz Supercomputing Centre
// Description : Internal interface for inserting and querying DCDB job data.
//================================================================================
//================================================================================
// This file is part of DCDB (DataCenter DataBase)
// Copyright (C) 2011-2018 Leibniz Supercomputing Centre
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
//================================================================================
/*
* @file
* @brief This file contains the internal functions of the
* Job Data Store which are provided by the
* JobDataStoreImpl class.
*/
#ifndef DCDB_JOBDATASTORE_INTERNAL_H
#define DCDB_JOBDATASTORE_INTERNAL_H
#include <list>
#include <string>
#include "dcdb/jobdatastore.h"
#include "dcdb/connection.h"
#include "dcdb/timestamp.h"
namespace DCDB {
/**
* @brief The JobDataStoreImpl class contains all protected
* functions belonging to JobDataStore which are
* hidden from the user of the libdcdb library.
*/
class JobDataStoreImpl {
protected:
Connection* connection; /**< The Connection object that does the low-level stuff for us. */
CassSession* session; /**< The CassSession object given by the connection. */
const CassPrepared* preparedInsert; /**< The prepared statement for fast insertions. */
/**
* @brief Prepare for insertions.
* @param ttl A TTL that will be set for newly inserted values. Set to 0 to insert without TTL.
*/
void prepareInsert(uint64_t ttl);
public:
/* See jobdatastore.h for documentation */
JDError insertJob(JobData& jdata);
JDError updateJob(JobData& jdata);
JDError updateEndtime(JobId jobId, TimeStamp startTs, TimeStamp endTime);
JDError deleteJob(JobId jid, TimeStamp startTs);
JDError getJobByPrimaryKey(JobData& job, JobId jid, TimeStamp startTs);
JDError getJobById(JobData& job, JobId jid);
JDError getJobsInIntervalExcl(std::list<JobData>& jobs,
TimeStamp intervalStart,
TimeStamp intervalEnd);
JDError getJobsInIntervalIncl(std::list<JobData>& jobs,
TimeStamp intervalStart,
TimeStamp intervalEnd);
JDError getNodeList(NodeList& nodes, JobId jid, TimeStamp startTs);
JobDataStoreImpl(Connection* conn);
virtual ~JobDataStoreImpl();
};
} /* End of namespace DCDB */
#endif /* DCDB_JOBDATASTORE_INTERNAL_H */
//================================================================================
// Name : c_api.cpp
// Author : Axel Auweter, Daniele Tafani
// Author : Axel Auweter, Daniele Tafani, Micha Mueller
// Copyright : Leibniz Supercomputing Centre
// Description : C API Implementation for libdcdb
//================================================================================
//================================================================================
// This file is part of DCDB (DataCenter DataBase)
// Copyright (C) 2011-2016 Leibniz Supercomputing Centre
// Copyright (C) 2011-2018 Leibniz Supercomputing Centre
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
......@@ -37,6 +37,7 @@
#include "dcdb/connection.h"
#include "dcdb/sensorconfig.h"
#include "dcdb/sensordatastore.h"
#include "dcdb/jobdatastore.h"
using namespace DCDB;
......@@ -433,3 +434,132 @@ DCDB_C_RESULT dcdbQuerySumMultipleThreaded(
delete connection;
return error;
}
Connection* connectToDatabase(const char* hostname, uint16_t port) {
Connection* conn = new Connection(hostname, port);
if (!conn->connect()) {
delete conn;
return NULL;
}
//dcdbConn->initSchema();
return conn;
}
DCDB_C_RESULT disconnectFromDatabase(Connection* conn) {
if (conn) {
conn->disconnect();
delete conn;
}
return DCDB_C_OK;
}
JobDataStore* constructJobDataStore(Connection* conn) {
if (conn) {
return new JobDataStore(conn);
}
return NULL;
}
DCDB_C_RESULT insertJobStart(JobDataStore* jds, JobId jid, UserId uid,
uint64_t startTs, const char ** nodes,
unsigned nodeSize) {
if (!jds) {
return DCDB_C_CONNERR;
}
JobData jdata;
JDError ret;
jdata.jobId = jid;
jdata.userId = uid;
jdata.startTime = startTs;
jdata.endTime = (uint64_t) 0;
for(unsigned i = 0; i < nodeSize; i++) {
jdata.nodes.push_back(nodes[i]);
}
ret = jds->insertJob(jdata);
if (ret == JD_OK) {
return DCDB_C_OK;
} else if (ret == JD_BADPARAMS) {
return DCDB_C_BADPARAMS;
}
return DCDB_C_UNKNOWN;
}
DCDB_C_RESULT updateJobEnd(JobDataStore* jds, JobId jid, uint64_t endTs) {
if (!jds) {
return DCDB_C_CONNERR;
}
JobData jdata;
JDError ret;
ret = jds->getJobById(jdata, jid);
if (ret == JD_UNKNOWNERROR || ret == JD_PARSINGERROR) {
return DCDB_C_UNKNOWN;
} else if (ret == JD_JOBIDNOTFOUND) {
return DCDB_C_BADPARAMS;
} else if (ret != JD_OK) {
return DCDB_C_UNKNOWN;
}
if (jds->updateEndtime(jid, jdata.startTime, endTs) != JD_OK) {
return DCDB_C_UNKNOWN;
}
return DCDB_C_OK;
}
DCDB_C_RESULT