pytorch

tensorflow

File I/O

data_path = 'gs://flowers-public/*/*.jpg'
labels = ['daisy', 'dandelion', 'roses', 'sunflowers', 'tulips']
n_imgs = len(tf.io.gfile.glob(data_path))

# 경로 뽑기
filenames = tf.data.Dataset.list_files(data_path)

#### question 1, 2 ####
for filename in fileset.take(10):
  print(filename.numpy().decode('utf-8'))


#### question 3 ####
# img 값 뽑기
def decode_jpg(filename): # 딱 하나만 할 수 있음
  bits = tf.io.read_file(filename)
  image = tf.image.decode_jpeg(bits)
  return image

images = filenames.map(decode_jpg) # 파이썬 효과 : list를 전부 함수에 돌릴 수 있음

for image in images.take(10):
  print(image.numpy().shape)


# #### question 4 ####
# img + label 뽑기
def decode_jpg_label(filename):
  bits = tf.io.read_file(filename)
  image = tf.image.decode_jpeg(bits)
  label = tf.strings.split(tf.expand_dims(filename, axis=-1), sep='/')
  label = label.values[-2]
  return image, label

dataset = filenames.map(decode_jpg_label)
for image, label in dataset.take(10):
  print(image.numpy().shape, label.numpy().decode('utf-8'))

keras

weight initializer, weight regularizer : layer에서 설정할 수 있음.

sparse: one_hot output을 y값에 맞게 변경시켜줌 (SparseCategoricalAccuracy, SparseCategoricalAccuracy)

data 불러오기 : ram차서 병목생김 - 안 좋음, 왠만하면 쓰지마셈

sklearn

pass

LabelEncoder : 범주형을 숫자로 inverse_transform fit_transform

seaborn

pandas

print 'Train min/max date: %s / %s' % (train.Date.min().date(), train.Date.max().date())
print 'Test  min/max date: %s / %s' % ( test.Date.min().date(),  test.Date.max().date())
print ''
print 'Number of days in train: %d' % ((train.Date.max() - train.Date.min()).days + 1)
print 'Number of days in test:  %d' % (( test.Date.max() -  test.Date.min()).days + 1)
print ''
print 'Train shape: %d rows' % train.shape[0]
print 'Test shape: %d rows'  % test.shape[0]