-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgenerator.cpp
More file actions
111 lines (97 loc) · 3.91 KB
/
generator.cpp
File metadata and controls
111 lines (97 loc) · 3.91 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
// generator.cpp
// filesystem requires C++17
#include <iostream>
#include <vector>
#include <random>
#include <math.h>
#include <algorithm>
#include <fstream>
#include <filesystem>
// Shopping session
struct Session
{
int administrative; // Account Settings
int product; // Pages with products
int information; // Information pages
double bounceRate; // Percent that leave after one page
double exitRate; // Percent that
double pageValue; // Page usua value of a page
std::string visitorType; // Returning, new, or cookies disabled
bool weekend; // True if weekend. False if Weekday
bool purchase; // Did they make a purchase
};
std::vector<Session> generateSessions(int n)
{
std::vector<Session> data;
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_int_distribution<> adminDist(0, 5);
std::uniform_int_distribution<> prodDist(0, 20);
std::uniform_int_distribution<> infoDist(0, 10);
std::uniform_real_distribution<> rateDist(0.0, 1.0);
std::uniform_real_distribution<> dist(0.0, 1.0);
std::exponential_distribution<> pageValDist(0.1);
std::bernoulli_distribution weekendDist(0.3);
std::bernoulli_distribution visitorDist(0.7); // 70% returning
for (int i = 0; i < n; ++i)
{
Session s;
s.administrative = adminDist(gen);
s.product = prodDist(gen);
s.information = infoDist(gen);
s.bounceRate = rateDist(gen);
s.exitRate = rateDist(gen);
s.pageValue = pageValDist(gen) * 5;
s.visitorType = visitorDist(gen) ? "Returning_Visitor" : "New_Visitor";
s.weekend = weekendDist(gen);
// Start with a base probability
double purchaseProb = 0.0;
// Feature Influence
purchaseProb += s.pageValue / 400.0; // Higher page value helps
purchaseProb += s.product * 0.01; // More product pages helps
purchaseProb += s.information * 0.01; // Info helps slightly
purchaseProb -= s.bounceRate * 0.3; // High bounce hurts
purchaseProb -= s.exitRate * 0.2; // High exit hurts
purchaseProb += (s.visitorType == "Returning_Visitor") ? 0.30 : 0.0;
purchaseProb += s.weekend ? 0.05 : 0.0; // Slight weekend boost
// Keeping it between 0 and 1
purchaseProb = std::min(purchaseProb, 1.0);
purchaseProb = std::max(purchaseProb, 0.0);
// Inject randomness
std::bernoulli_distribution rareWin(0.02); // 2% chance of random yes
std::bernoulli_distribution rareFail(0.05); // 5% chance of random no
std::bernoulli_distribution purchaseDist(purchaseProb);
// This is to randomly have it be yes or randomly be no
double r = dist(gen);
if (r < 0.02)
s.purchase = true; // 2% random yes
else if (r > 0.95)
s.purchase = false; // 5% random no
else
s.purchase = (r < purchaseProb);
data.push_back(s);
}
return data;
}
void writeSessionsToCSV(const std::vector<Session> &data, const std::string &filename)
{
std::ofstream file(filename);
file << "Administrative,Product,Information,BounceRate,ExitRate,PageValue,VisitorType,Weekend,Purchase\n";
for (const auto &s : data)
{
file << s.administrative << "," << s.product << "," << s.information << "," << s.bounceRate << ","
<< s.exitRate << "," << s.pageValue << "," << s.visitorType << "," << s.weekend << "," << s.purchase << "\n";
}
}
int main()
{
std::vector<Session> trainingData = generateSessions(1000);
std::vector<Session> actualData = generateSessions(300);
std::string output = "Data_Input/";
std::filesystem::create_directories(output);
// Training Data
writeSessionsToCSV(trainingData, output + "shoppers_train.csv");
// Actual Results
writeSessionsToCSV(actualData, output + "shoppers_actual.csv");
return 0;
}