Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
## DeepFuzz

* run.sh is used to crawl training data from gcc.
* pre.py is used to prepare training data.
* prepare.py is used to prepare training data.
* train.py is used to train a s2s model.
* generate.py is used to generate new programs with trained model.
5 changes: 3 additions & 2 deletions generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import pdb
import os
import preprocess as pp
import random

latent_dim = 512 # Latent dimensionality of the encoding space.
num_samples = 2000000 # Number of samples to train on.
Expand Down Expand Up @@ -53,7 +54,7 @@
input_token_index = dict(
[(char, i) for i, char in enumerate(input_characters)])
target_token_index = dict(
[(char, i) for i, char in enumerate(target_characqaters)])
[(char, i) for i, char in enumerate(target_characters)])

encoder_input_data = np.zeros(
(len(input_texts), max_encoder_seq_length, num_encoder_tokens),
Expand Down Expand Up @@ -296,7 +297,7 @@ def generate():
continue
total_count += 1
text = synthesis(text, 'g1', 'nosample')
is_valid = verify_correctness(text, file, 'deepfuzz_g1_nosample')
is_valid = pp.verify_correctness(text, file, 'deepfuzz_g1_nosample')
if (is_valid):
syntax_valid_count += 1
except:
Expand Down
39 changes: 0 additions & 39 deletions pre.py

This file was deleted.

10 changes: 5 additions & 5 deletions prepare.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,11 @@ def generate_training_data(text):
sentences = []
next_chars = []
for i in range(0, len(text) - maxlen - 1):
sentences.append(text[i: i + maxlen])
next_chars.append(text[i + maxlen])
sentences[i] = re.sub(r'[\n\t]',' ', sentences[i])
next_chars[i] = re.sub(r'[\n\t]',' ', next_chars[i])
print(sentences[i] + "\t" + next_chars[i])
sentences.append(text[i: i + maxlen])
next_chars.append(text[i + maxlen])
sentences[i] = re.sub(r'[\n\t]',' ', sentences[i])
next_chars[i] = re.sub(r'[\n\t]',' ', next_chars[i])
print(sentences[i] + "\t" + next_chars[i], file=open('pair', 'a+', encoding='iso-8859-1'))

path = './gcc/gcc/testsuite'
files = []
Expand Down