/*
* Copyright © 2012 Intel Corporation
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library. If not, see .
*
* Author: Benjamin Segovia
*/
/**
* \file program.hpp
* \author Benjamin Segovia
*/
#ifndef __GBE_PROGRAM_HPP__
#define __GBE_PROGRAM_HPP__
#include "backend/program.h"
#include "backend/context.hpp"
#include "ir/constant.hpp"
#include "ir/unit.hpp"
#include "ir/function.hpp"
#include "ir/printf.hpp"
#include "ir/sampler.hpp"
#include "sys/vector.hpp"
#include
namespace gbe {
namespace ir {
class Unit; // Compilation unit. Contains the program to compile
} /* namespace ir */
} /* namespace gbe */
namespace gbe {
/*! Info for the kernel argument */
struct KernelArgument {
gbe_arg_type type; //!< Pointer, structure, image, regular value?
uint32_t size; //!< Size of the argument
uint32_t align; //!< addr alignment of the argument
uint8_t bti; //!< binding table index for __global buffer
// Strings for arg info.
struct ArgInfo {
uint32_t addrSpace;
std::string typeName;
std::string accessQual;
std::string typeQual;
std::string argName;
uint32_t typeSize;
};
ArgInfo info;
};
/*! Stores the offset where to patch where to patch */
struct PatchInfo {
INLINE PatchInfo(gbe_curbe_type type, uint32_t subType = 0u, uint32_t offset = 0u) :
type(uint32_t(type)), subType(subType), offset(offset) {}
INLINE PatchInfo(void) {}
uint64_t type : 16; //!< Type of the patch (see program.h for the list)
uint64_t subType : 32; //!< Optional sub-type of the patch (see program.h)
uint64_t offset : 16; //!< Optional offset to encode
};
/*! We will sort PatchInfo to make binary search */
INLINE bool operator< (PatchInfo i0, PatchInfo i1) {
if (i0.type != i1.type) return i0.type < i1.type;
return i0.subType < i1.subType;
}
/*! Describe a compiled kernel */
class Kernel : public NonCopyable, public Serializable
{
public:
/*! Create an empty kernel with the given name */
Kernel(const std::string &name);
/*! Destroy it */
virtual ~Kernel(void);
/*! Return the instruction stream (to be implemented) */
virtual const char *getCode(void) const = 0;
/*! Set the instruction stream.*/
virtual void setCode(const char *, size_t size) = 0;
/*! Return the instruction stream size (to be implemented) */
virtual uint32_t getCodeSize(void) const = 0;
/*! Get the kernel name */
INLINE const char *getName(void) const { return name.c_str(); }
/*! Return the number of arguments for the kernel call */
INLINE uint32_t getArgNum(void) const { return argNum; }
/*! Return the size of the given argument */
INLINE uint32_t getArgSize(uint32_t argID) const {
return argID >= argNum ? 0u : args[argID].size;
}
/*! Return the bti for __global buffer */
INLINE uint8_t getArgBTI(uint32_t argID) const {
return argID >= argNum ? 0u : args[argID].bti;
}
/*! Return the alignment of buffer argument */
INLINE uint32_t getArgAlign(uint32_t argID) const {
return argID >= argNum ? 0u : args[argID].align;
}
/*! Return the type of the given argument */
INLINE gbe_arg_type getArgType(uint32_t argID) const {
return argID >= argNum ? GBE_ARG_INVALID : args[argID].type;
}
/*! Get the offset where to patch. Returns -1 if no patch needed */
int32_t getCurbeOffset(gbe_curbe_type type, uint32_t subType) const;
/*! Get the curbe size required by the kernel */
INLINE uint32_t getCurbeSize(void) const { return this->curbeSize; }
/*! Return the size of the stack (zero if none) */
INLINE uint32_t getStackSize(void) const { return this->stackSize; }
/*! Return the size of the scratch memory needed (zero if none) */
INLINE uint32_t getScratchSize(void) const { return this->scratchSize; }
/*! Get the SIMD width for the kernel */
INLINE uint32_t getSIMDWidth(void) const { return this->simdWidth; }
/*! Says if SLM is needed for it */
INLINE bool getUseSLM(void) const { return this->useSLM; }
/*! get slm size for kernel local variable */
INLINE uint32_t getSLMSize(void) const { return this->slmSize; }
/*! Return the OpenCL version */
INLINE void setOclVersion(uint32_t version) { this->oclVersion = version; }
INLINE uint32_t getOclVersion(void) const { return this->oclVersion; }
/*! Set sampler set. */
void setSamplerSet(ir::SamplerSet *from) {
samplerSet = from;
}
/*! Get defined sampler size */
size_t getSamplerSize(void) const { return (samplerSet == NULL ? 0 : samplerSet->getDataSize()); }
/*! Get defined sampler value array */
void getSamplerData(uint32_t *samplers) const { samplerSet->getData(samplers); }
/*! Set image set. */
void setImageSet(ir::ImageSet * from) {
imageSet = from;
}
/*! Set profiling info. */
void setProfilingInfo(ir::ProfilingInfo * from) {
profilingInfo = from;
}
void * dupProfilingInfo() const {
void* ptr = profilingInfo ? (void *)(new ir::ProfilingInfo(*profilingInfo)) : NULL;
return ptr;
}
uint32_t getProfilingBTI(void) const {
return profilingInfo ? profilingInfo->getBTI() : 0;
}
/*! Set printf set. */
void setPrintfSet(ir::PrintfSet * from) {
printfSet = from;
}
uint32_t getPrintfNum() const {
return printfSet ? printfSet->getPrintfNum() : 0;
}
void * dupPrintfSet() const {
void* ptr = printfSet ? (void *)(new ir::PrintfSet(*printfSet)) : NULL;
return ptr;
}
uint8_t getPrintfBufBTI() const {
GBE_ASSERT(printfSet);
return printfSet->getBufBTI();
}
uint32_t getProfilingBufBTI() const {
GBE_ASSERT(profilingInfo);
return profilingInfo->getBTI();
}
void outputProfilingInfo(void* buf) {
if(profilingInfo)
profilingInfo->outputProfilingInfo(buf);
}
KernelArgument::ArgInfo* getArgInfo(uint32_t id) const { return &args[id].info; }
/*! Set compile work group size */
void setCompileWorkGroupSize(const size_t wg_sz[3]) {
compileWgSize[0] = wg_sz[0];
compileWgSize[1] = wg_sz[1];
compileWgSize[2] = wg_sz[2];
}
/*! Get compile work group size */
void getCompileWorkGroupSize (size_t wg_sz[3]) const {
wg_sz[0] = compileWgSize[0];
wg_sz[1] = compileWgSize[1];
wg_sz[2] = compileWgSize[2];
}
/*! Set function attributes string. */
void setFunctionAttributes(const std::string& functionAttributes) { this->functionAttributes= functionAttributes; }
/*! Get function attributes string. */
const char* getFunctionAttributes(void) const {return this->functionAttributes.c_str();}
/*! Get defined image size */
size_t getImageSize(void) const { return (imageSet == NULL ? 0 : imageSet->getDataSize()); }
/*! Get defined image value array */
void getImageData(ImageInfo *images) const { imageSet->getData(images); }
static const uint32_t magic_begin = TO_MAGIC('K', 'E', 'R', 'N');
static const uint32_t magic_end = TO_MAGIC('N', 'R', 'E', 'K');
/* format:
magic_begin |
name_size |
name |
arg_num |
args |
PatchInfo_num |
PatchInfo |
curbeSize |
simdWidth |
stackSize |
scratchSize |
useSLM |
slmSize |
samplers |
images |
code_size |
code |
magic_end
*/
/*! Implements the serialization. */
virtual uint32_t serializeToBin(std::ostream& outs);
virtual uint32_t deserializeFromBin(std::istream& ins);
virtual void printStatus(int indent, std::ostream& outs);
/*! Does kernel use device enqueue */
INLINE bool getUseDeviceEnqueue(void) const { return this->useDeviceEnqueue; }
/*! Change the device enqueue info of the function */
INLINE bool setUseDeviceEnqueue(bool useDeviceEnqueue) {
return this->useDeviceEnqueue = useDeviceEnqueue;
}
protected:
friend class Context; //!< Owns the kernels
friend class GenContext;
std::string name; //!< Kernel name
KernelArgument *args; //!< Each argument
vector patches; //!< Indicates how to build the curbe
uint32_t argNum; //!< Number of function arguments
uint32_t curbeSize; //!< Size of the data to push
uint32_t simdWidth; //!< SIMD size for the kernel (lane number)
uint32_t stackSize; //!< Stack size (0 if unused)
uint32_t scratchSize; //!< Scratch memory size (may be 0 if unused)
uint32_t oclVersion; //!< Opencl Version (120 for 1.2, 200 for 2.0)
bool useSLM; //!< SLM requires a special HW config
uint32_t slmSize; //!< slm size for kernel variable
Context *ctx; //!< Save context after compiler to alloc constant buffer curbe
ir::SamplerSet *samplerSet;//!< Copy from the corresponding function.
ir::ImageSet *imageSet; //!< Copy from the corresponding function.
ir::PrintfSet *printfSet; //!< Copy from the corresponding function.
ir::ProfilingInfo *profilingInfo; //!< Copy from the corresponding function.
uint32_t compileWgSize[3]; //!< required work group size by kernel attribute.
std::string functionAttributes; //!< function attribute qualifiers combined.
bool useDeviceEnqueue; //!< Has device enqueue?
GBE_CLASS(Kernel); //!< Use custom allocators
};
/*! Describe a compiled program */
class Program : public NonCopyable, public Serializable
{
public:
/*! Create an empty program */
Program(uint32_t fast_relaxed_math);
/*! Destroy the program */
virtual ~Program(void);
/*! Clean LLVM resource of the program */
virtual void CleanLlvmResource() = 0;
/*! Get the number of kernels in the program */
uint32_t getKernelNum(void) const { return kernels.size(); }
/*! Get the kernel from its name */
Kernel *getKernel(const std::string &name) const {
map::const_iterator it = kernels.find(name);
if (it == kernels.end())
return NULL;
else
return it->second;
}
/*! Get the kernel from its ID */
Kernel *getKernel(uint32_t ID) const {
uint32_t currID = 0;
Kernel *kernel = NULL;
for (map::const_iterator it = kernels.begin(); it != kernels.end(); ++it) {
if (currID == ID) {
kernel = it->second;
break;
}
currID++;
}
return kernel;
}
const char *getDeviceEnqueueKernelName(uint32_t index) const {
if(index >= blockFuncs.size())
return NULL;
return blockFuncs[index].c_str();
}
/*! Build a program from a ir::Unit */
bool buildFromUnit(const ir::Unit &unit, std::string &error);
/*! Buils a program from a LLVM source code */
bool buildFromLLVMFile(const char *fileName, const void* module, std::string &error, int optLevel);
/*! Buils a program from a OCL string */
bool buildFromSource(const char *source, std::string &error);
/*! Get size of the global constant arrays */
size_t getGlobalConstantSize(void) const { return constantSet->getDataSize(); }
/*! Get the content of global constant arrays */
void getGlobalConstantData(char *mem) const { constantSet->getData(mem); }
uint32_t getGlobalRelocCount(void) const { return relocTable->getCount(); }
void getGlobalRelocTable(char *p) const { relocTable->getData(p); }
static const uint32_t magic_begin = TO_MAGIC('P', 'R', 'O', 'G');
static const uint32_t magic_end = TO_MAGIC('G', 'O', 'R', 'P');
/* format:
magic_begin |
constantSet_flag |
constSet_data |
kernel_num |
kernel_1 |
........ |
kernel_n |
magic_end |
total_size
*/
/*! Implements the serialization. */
virtual uint32_t serializeToBin(std::ostream& outs);
virtual uint32_t deserializeFromBin(std::istream& ins);
virtual void printStatus(int indent, std::ostream& outs);
uint32_t fast_relaxed_math : 1;
protected:
/*! Compile a kernel */
virtual Kernel *compileKernel(const ir::Unit &unit, const std::string &name,
bool relaxMath, int profiling) = 0;
/*! Allocate an empty kernel. */
virtual Kernel *allocateKernel(const std::string &name) = 0;
/*! Kernels sorted by their name */
map kernels;
/*! Global (constants) outside any kernel */
ir::ConstantSet *constantSet;
/*! relocation table */
ir::RelocTable *relocTable;
/*! device enqueue functions */
vector blockFuncs;
/*! Use custom allocators */
GBE_CLASS(Program);
};
} /* namespace gbe */
#endif /* __GBE_PROGRAM_HPP__ */