-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdataPartitioner.py
More file actions
68 lines (54 loc) · 1.6 KB
/
dataPartitioner.py
File metadata and controls
68 lines (54 loc) · 1.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import numpy as np
from tree import makeNode
from poolTree import PoolTree
import random
import os
# +=======================CREACION DE DIRECTORIOS
print("Creando las carpetas", end='')
folders = ["500k", "50k", "5k"]
sizes = [500000, 50000, 5000]
for f in folders:
path = "./data/"+f
try:
os.mkdir(path)
except OSError:
print ("La creación del directorio %s fallo" % path)
else:
print('.', end='')
evPath = "data/evaDataCover.npy"
compPath = "data/compDataCover.npy"
trainPath = "/trainingData.npy"
# +======================= LECTURA DE DATOS
print("Leyendo datos...")
sourceData = "data/covtype.data"
allData = []
with open(sourceData) as fl:
for f in fl:
if not f == "":
row = f.split(",")
row[-1] = row[-1][:-1]
npRow = np.array(row)
allData.append(npRow.astype(float))
# +======================= RANDOMIZACIÓN
print("Randomizando datos...")
random.shuffle(allData)
# =================Division de datos
print("Dividiendo los datos", end='')
fullSize = 581012
partitionSize = round(fullSize*0.2)
competitionData = []
evaluationData = []
for i in range(partitionSize):
competitionData.append(allData[i*2])
evaluationData.append(allData[(i*2)+1])
np.save(evPath, evaluationData)
np.save(compPath, competitionData)
print('.',end='')
for folder, size in zip(folders, sizes):
trainingData = []
minVal = min(fullSize, partitionSize*2+size)
for i in range(partitionSize*2, minVal):
trainingData.append(allData[i])
np.save("./data/"+folder+trainPath, trainingData)
print('.',end='')
print()