Download: sally-0.6.1-with-bindings.tar.gz
One of the tools I have used recently in my machine-learning projects is Sally. As Sally’s web page describes it: “There are many applications for Sally, for example, in the areas of natural language processing, bioinformatics, information retrieval and computer security”. You can look at the example page to see more details. It is written in C which makes it fast, but as is usually the case, using a tool like this directly from Python, would make life easier. It would make for faster prototyping and system development and since it is a tool that I think I will be using repeatedly in the future, I gave the library Python bindings. In this post I would like to outline the technique I use to create a python module from a C library. I use Swig for the bindings and you will have to be, to some extent, familiar with Swig to follow the rest of this post.
%module example %{ #include "example.h" %} %include "example.h"
%module pysally %{ #include <libconfig.h> %} %include <cstring.i> %cstring_output_allocate(char **out1, free(*$1)); %{ void config_lookup_string_2( const config_t *config, const char *path, char **out1) { *out1 = (char *) malloc(1024); (*out1)[0] = 0; config_lookup_string(config, path, (const char *)out1); } %} %include <libconfig.h>
%module pysally %{ #include "pysally.h" %} %include "std_string.i" %include "pysally.h"
class Sally { public: Sally(int verbose, std::string in, std::string out) : entries_(0), input_(in), output_(out) {} ~Sally(); /// Load the configuration of Sally void load_config(const std::string& config_file); /// Init the Sally tool void init(); /// Main processing routine of Sally. /// This function processes chunks of strings. void process(); /// Get/Set configuration std::string getConfigAttribute(std::string name); void setConfigAttribute(std::string name, std::string value); // etc // ... // ... private: config_t cfg_; int verbose_; long entries_; std::string input_; std::string output_; };
verbose = 0 in = "/tmp/input" out = "/tmp/output" config = "/tmp/sally.cfg" s = Sally(verbose, in, out) s.load_config(config) s.init() s.process()
%module(directors="1") pysally %{ #include "pysally.h" %} %feature("director") Reader; %feature("director") Writer; %include "std_string.i" %include "pysally.h"
class Writer { public: Writer(std::string out); virtual ~Writer(); virtual void init(); virtual const std::string getName(); virtual int write(const output_list& output, int len); private: config_t& cfg_; std::string output_; bool hasout_; }; class Reader { public: Reader(std::string in); virtual ~Reader(); virtual void init(); virtual const std::string getName(); virtual long getNrEntries(); virtual int read(string_list& strs, int len); private: config_t& cfg_; std::string input_; long entries_; };
class MyReader(Reader): def __init__(self, input): super(MyReader, self).__init__(input) def read(self, strings, len): return super(MyReader, self).read(strings, len) def init(self): super(MyReader, self).init() def getNrEntries(self): return super(MyReader, self).getNrEntries() class MyWriter(Writer): def __init__(self, output): super(MyWriter, self).__init__(output) def init(self): pass def write(self, fvec, len): for j in range(len): print "l:", fvec.getFeaturesLabel(j), for i in range(fvec.getListLength(j)): print fvec.getDimension(j, i), fvec.getValue(j, i), print fvec.getValue(j, i) print fvec.getFeaturesSource(j) print return 1 input = "/home/edimchr/reuters.zip" output = "/home/edimchr/tmp/pyreuters.libsvm" verbose = 0 r = MyReader(input) w = MyWriter(output) #r = Reader(input) #w = Writer(output) s = Sally(verbose, r, w) s.load_config("./example.cfg") s.init() s.process()
class string_list { private: string_t* str_; public: string_list(string_t* str) : str_(str) {} /// Length for element i void setStringLength(int i, int len) { str_[i].len = len ; } /// String data for element i void setStringData(int i, char* data) { str_[i].str = strdup(data); } /// Optional label of string void setLabel(int i, float label) { str_[i].label = label; } /// Optional description of source void setSource(int i, char* src) { str_[i].src = strdup(src); } string_t* getString() const { return str_; } }; class output_list { private: fvec_t** vec_; public: output_list(fvec_t** vec) : vec_(vec) {} /// Length for element i unsigned long getListLength(int i) const { return vec_[i]->len; } /// Nr of features for element i unsigned long getTotalFeatures(int i) const { return vec_[i]->total; } /// Label for element i float getFeaturesLabel(int i) const { return vec_[i]->label; } /// List of dimensions j unsigned long getDimension(int i, int j) { return vec_[i]->dim[j]; } /// List of values for element i float getValue(int i, int j) { return vec_[i]->val[j]; } char* getFeaturesSource(int i) const { return vec_[i]->src; } fvec_t** getFvec() const { return vec_; } };
sally-0.6.1/
m4/
a
c_pkg_swig.m4
ax_pkg_swig.m4
ax_python_devel.m4
pysally/
Makefile.am
swig.i
src/
Makefile.am
Makefile.am
configure.in
2) Add
pysally to sally-0.6.1/Makefile.am
……
SUBDIRS = src doc tests contrib pysally
……
……
3) Add the following to sally-0.6.1/configure.in
AC_PROG_CXX
AC_DISABLE_STATIC
AC_PROG_LIBTOOL
AX_PYTHON_DEVEL(>= '2.3')
AM_PATH_PYTHON
AC_PROG_SWIG(1.3.21)
SWIG_ENABLE_CXX
SWIG_PYTHON
4) Add pysally/Makefile to AC_CONFIG_FILES in sally-0.6.1/configure.in
AC_CONFIG_FILES([
Makefile \
src/Makefile \
src/input/Makefile \
src/output/Makefile \
src/fvec/Makefile \
doc/Makefile \
tests/Makefile \
contrib/Makefile \
pysally/Makefile \
])
sally-0.6.1
0 comments:
Post a Comment