Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .gitmodules
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
[submodule "corels/src/corels"]
path = corels/src/corels
url = https://github.com/fingoldin/corels.git
url = https://github.com/alexzheng587/corels.git
2 changes: 2 additions & 0 deletions corels/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
.idea
../cython_debug/
3,157 changes: 1,502 additions & 1,655 deletions corels/_corels.cpp

Large diffs are not rendered by default.

208 changes: 116 additions & 92 deletions corels/_corels.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,19 @@ from libc.stdlib cimport malloc, free
from libcpp.vector cimport vector
from libcpp.set cimport set
from libcpp.string cimport string
from libc.stdio cimport printf
import numpy as np
cimport numpy as np
cimport cython

cdef extern from "src/corels/src/rule.hh":
cdef extern from "src/corels/src/rule.h":
ctypedef unsigned long* VECTOR
cdef struct rule:
VECTOR truthtable
char* features
int cardinality
int* ids
int support
int cardinality
int *ids
VECTOR truthtable

ctypedef rule rule_t

Expand All @@ -30,23 +31,18 @@ cdef extern from "src/corels/src/rule.hh":
int count_ones_vector(VECTOR, int)

cdef extern from "src/corels/src/run.hh":
int run_corels_begin(double c, char* vstring, int curiosity_policy,
int map_type, int ablation, int calculate_size, int nrules, int nlabels,
int nsamples, rule_t* rules, rule_t* labels, rule_t* meta, int freq, char* log_fname,
PermutationMap*& pmap, CacheTree*& tree, Queue*& queue, double& init,
set[string]& verbosity)

int run_corels_loop(size_t max_num_nodes, PermutationMap* pmap, CacheTree* tree, Queue* queue)

double run_corels_end(vector[int]* rulelist, vector[int]* classes, int early, int latex_out, rule_t* rules,
rule_t* labels, char* opt_fname, PermutationMap*& pmap, CacheTree*& tree, Queue*& queue,
double init, set[string]& verbosity)
double run_corels(double c, char* vstring, int curiosity_policy,
int map_type, int ablation, int calculate_size, int nrules, int nlabels,
int nsamples, rule_t* rules, rule_t* labels, rule_t* meta, int freq, char* log_fname,
PermutationMap*& pmap, CacheTree*& tree, Queue*& queue, double& init,
int verbosity, int num_threads, int max_num_nodes, int nmeta, int random_seed,
vector[int]* rulelist, vector[int]* classes)

cdef extern from "src/utils.hh":
int mine_rules(char **features, rule_t *samples, int nfeatures, int nsamples,
int max_card, double min_support, rule_t **rules_out, int verbose)
int max_card, double min_support, rule_t **rules_out, int verbose, int pre_mine)

int minority(rule_t* rules, int nrules, rule_t* labels, int nsamples, rule_t* minority_out, int verbose)
int minority(rule_t* rules, int nrules, rule_t* labels, int nsamples, rule_t* minority_out, int verbose, int* minor_count)

cdef extern from "src/corels/src/pmap.hh":
cdef cppclass PermutationMap:
Expand Down Expand Up @@ -145,7 +141,6 @@ cdef rule_t* _to_vector(np.ndarray[np.uint8_t, ndim=2] X, int* ncount_out):

ncount_out[0] = ncount

vectors[i].ids = NULL
vectors[i].features = NULL
vectors[i].cardinality = 1
vectors[i].support = nones
Expand All @@ -155,15 +150,18 @@ cdef rule_t* _to_vector(np.ndarray[np.uint8_t, ndim=2] X, int* ncount_out):
cdef _free_vector(rule_t* vs, int count):
if vs == NULL:
return


printf("before for loop\n")
for i in range(count):
printf("before rule_vfree\n")
rule_vfree(&vs[i].truthtable)
if vs[i].ids:
free(vs[i].ids)

printf("before vs[i].features\n")
if vs[i].features:
printf("before free")
free(vs[i].features)


printf("after for loop\n")
free(vs)

cdef rule_t* rules = NULL
Expand All @@ -180,12 +178,16 @@ def fit_wrap_begin(np.ndarray[np.uint8_t, ndim=2] samples,
np.ndarray[np.uint8_t, ndim=2] labels,
features, int max_card, double min_support, verbosity_str, int mine_verbose,
int minor_verbose, double c, int policy, int map_type, int ablation,
int calculate_size):
int calculate_size, int pre_mine, int num_threads, int max_num_nodes,
np.ndarray[np.uint8_t, ndim=2] minority_list, int random_seed, int freq):
global rules
global labels_vecs
global minor
global n_rules

rules = NULL
labels_vecs = NULL
minor = NULL
cdef int nfeatures = 0
cdef rule_t* samples_vecs = _to_vector(samples, &nfeatures)

Expand Down Expand Up @@ -224,15 +226,15 @@ def fit_wrap_begin(np.ndarray[np.uint8_t, ndim=2] samples,
n_rules = 0

cdef int r = mine_rules(features_vec, samples_vecs, nfeatures, nsamples,
max_card, min_support, &rules, mine_verbose)
max_card, min_support, &rules, mine_verbose, pre_mine)

if features_vec != NULL:
for i in range(nfeatures):
if features_vec[i] != NULL:
free(features_vec[i])
free(features_vec)
features_vec = NULL

if samples_vecs != NULL:
_free_vector(samples_vecs, nsamples)
samples_vecs = NULL
Expand Down Expand Up @@ -283,77 +285,54 @@ def fit_wrap_begin(np.ndarray[np.uint8_t, ndim=2] samples,
raise MemoryError();
strcpy(labels_vecs[0].features, "label=0")
strcpy(labels_vecs[1].features, "label=1")

if minor != NULL:
_free_vector(minor, 1)
minor = NULL

minor = <rule_t*>malloc(sizeof(rule_t))
if minor == NULL:
if labels_vecs != NULL:
_free_vector(labels_vecs, 2)
labels_vecs = NULL
if rules != NULL:
_free_vector(rules, n_rules)
rules = NULL
n_rules = 0
raise MemoryError();

cdef int mr = minority(rules, n_rules, labels_vecs, nsamples, minor, minor_verbose)
if mr != 0:
if labels_vecs != NULL:
_free_vector(labels_vecs, 2)
labels_vecs = NULL
if rules != NULL:
_free_vector(rules, n_rules)
rules = NULL
n_rules = 0
raise MemoryError()
cdef int minor_count = 0;
if minority_list[0][0] < 0:
printf("in _to_vector\n")
minor = _to_vector(minority_list, &minor_count)
minor.features = <char*>malloc(9)
strcpy(minor.features, "minority")
printf("minor count is %i\n", minor_count)
else:
minor = <rule_t*>malloc(sizeof(rule_t))
if minor == NULL:
if labels_vecs != NULL:
_free_vector(labels_vecs, 2)
labels_vecs = NULL
if rules != NULL:
_free_vector(rules, n_rules)
rules = NULL
n_rules = 0
raise MemoryError();

if minority(rules, n_rules, labels_vecs, nsamples, minor, minor_verbose, &minor_count) != 0:
if labels_vecs != NULL:
_free_vector(labels_vecs, 2)
labels_vecs = NULL
if rules != NULL:
_free_vector(rules, n_rules)
rules = NULL
n_rules = 0
raise MemoryError()
"""
if count_ones_vector(minor[0].truthtable, nsamples) <= 0:
if minor != NULL:
_free_vector(minor, 1)
minor = NULL
"""

cdef int rb = run_corels_begin(c, verbosity, policy, map_type, ablation, calculate_size,
n_rules, 2, nsamples, rules, labels_vecs, minor, 0, NULL, pmap, tree,
queue, init, run_verbosity)

if rb == -1:
if labels_vecs != NULL:
_free_vector(labels_vecs, 2)
labels_vecs = NULL
if minor != NULL:
_free_vector(minor, 1)
minor = NULL
if rules != NULL:
_free_vector(rules, n_rules)
rules = NULL
n_rules = 0

return False

return True

def fit_wrap_loop(size_t max_nodes):
cdef size_t max_num_nodes = max_nodes
# This is where the magic happens
return (run_corels_loop(max_num_nodes, pmap, tree, queue) != -1)

def fit_wrap_end(int early):
global rules
global labels_vecs
global minor
global n_rules

cdef vector[int] rulelist
cdef vector[int] classes
run_corels_end(&rulelist, &classes, early, 0, NULL, NULL, NULL, pmap, tree,
queue, init, run_verbosity)

cdef double rb = run_corels(c, verbosity, policy, map_type, ablation, calculate_size,
n_rules, 2, nsamples, rules, labels_vecs, minor, freq, NULL, pmap, tree,
queue, init, 10, num_threads, max_num_nodes, minor_count, random_seed,
&rulelist, &classes)

r_out = []
print(rulelist.size())
for i in range(rulelist.size()):
if rulelist[i] < n_rules:
r_out.append({})
Expand All @@ -365,18 +344,63 @@ def fit_wrap_end(int early):

r_out.append({ "antecedents": [0], "prediction": bool(classes[rulelist.size()]) })

# Exiting early skips cleanup
if early == 0:
if labels_vecs != NULL:
if rb == -1:
if labels_vecs != NULL:
_free_vector(labels_vecs, 2)
if minor != NULL:
labels_vecs = NULL
if minor != NULL:
_free_vector(minor, 1)
if rules != NULL:
minor = NULL
if rules != NULL:
_free_vector(rules, n_rules)

minor = NULL
rules = NULL
labels_vecs = NULL
n_rules = 0
rules = NULL
n_rules = 0

return Exception

return r_out

# def fit_wrap_loop(size_t max_nodes):
# cdef size_t max_num_nodes = max_nodes
# # This is where the magic happens
# return (run_corels_loop(max_num_nodes, pmap, tree, queue) != -1)
#
# def fit_wrap_end(int early):
# global rules
# global labels_vecs
# global minor
# global n_rules
#
# cdef vector[int] rulelist
# cdef vector[int] classes
# run_corels_end(&rulelist, &classes, early, 0, NULL, NULL, NULL, pmap, tree,
# queue, init, run_verbosity)
#
# r_out = []
# print(rulelist.size())
# for i in range(rulelist.size()):
# if rulelist[i] < n_rules:
# r_out.append({})
# r_out[i]["antecedents"] = []
# for j in range(rules[rulelist[i]].cardinality):
# r_out[i]["antecedents"].append(rules[rulelist[i]].ids[j])
#
# r_out[i]["prediction"] = bool(classes[i])
#
# r_out.append({ "antecedents": [0], "prediction": bool(classes[rulelist.size()]) })
#
# # Exiting early skips cleanup
# if early == 0:
# if labels_vecs != NULL:
# _free_vector(labels_vecs, 2)
# if minor != NULL:
# _free_vector(minor, 1)
# if rules != NULL:
# _free_vector(rules, n_rules)
#
# minor = NULL
# rules = NULL
# labels_vecs = NULL
# n_rules = 0
#
# return r_out
Loading