singa/examples/char-rnn/sample.py at e906b84dbba902a1edd4cd10e7e8534b0846ab39 · apache/singa · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =============================================================================
'''Sample characters from the pre-trained model'''
import sys
import os
import cPickle as pickle
import numpy as np
import argparse

sys.path.append(os.path.join(os.path.dirname(__file__), '../../build/python'))
from singa import layer
from singa import tensor
from singa import device
from singa.proto import model_pb2


def sample(model_path, nsamples=100, seed_text='', do_sample=True):
    with open(model_path, 'rb') as fd:
        d=pickle.load(fd)
        rnn_w = tensor.from_numpy(d['rnn_w'])
        idx_to_char=d['idx_to_char']
        char_to_idx=d['char_to_idx']
        vocab_size = len(idx_to_char)
        dense_w = tensor.from_numpy(d['dense_w'])
        dense_b = tensor.from_numpy(d['dense_b'])
        hidden_size = d['hidden_size']
        num_stacks = d['num_stacks']
        dropout = d['dropout']

    cuda = device.create_cuda_gpu()
    rnn = layer.LSTM(name='lstm', hidden_size=hidden_size,
            num_stacks=num_stacks, dropout=dropout,
            input_sample_shape=(len(idx_to_char),))
    rnn.to_device(cuda)
    rnn.param_values()[0].copy_data(rnn_w)
    dense = layer.Dense('dense', vocab_size, input_sample_shape=(hidden_size,))
    dense.to_device(cuda)
    dense.param_values()[0].copy_data(dense_w)
    dense.param_values()[1].copy_data(dense_b)
    hx = tensor.Tensor((num_stacks, 1, hidden_size), cuda)
    cx = tensor.Tensor((num_stacks, 1, hidden_size), cuda)
    hx.set_value(0.0)
    cx.set_value(0.0)
    if len(seed_text) > 0:
        for c in seed_text:
            x = np.zeros((1, vocab_size), dtype=np.float32)
            x[0, char_to_idx[c]] = 1
            tx=tensor.from_numpy(x)
            tx.to_device(cuda)
            inputs=[tx, hx, cx]
            outputs=rnn.forward(model_pb2.kEval, inputs)
            y = dense.forward(model_pb2.kEval, outputs[0])
            y = tensor.softmax(y)
            hx = outputs[1]
            cx = outputs[2]
        sys.stdout.write(seed_text)
    else:
        y = tensor.Tensor((1, vocab_size), cuda)
        y.set_value(1.0 / vocab_size)

    for i in range(nsamples):
        y.to_host()
        prob = tensor.to_numpy(y)[0]
        if do_sample:
            cur=np.random.choice(vocab_size, 1, p=prob)[0]
        else:
            cur = np.argmax(prob)
        sys.stdout.write(idx_to_char[cur])
        x = np.zeros((1, vocab_size), dtype=np.float32)
        x[0, cur] = 1
        tx=tensor.from_numpy(x)
        tx.to_device(cuda)
        inputs=[tx, hx, cx]
        outputs=rnn.forward(model_pb2.kEval, inputs)
        y = dense.forward(model_pb2.kEval, outputs[0])
        y = tensor.softmax(y)
        hx = outputs[1]
        cx = outputs[2]
    print ''

if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='sample chars from char-rnn')
    parser.add_argument('--seed', help='seed text string which warms up the rnn'\
            ' states for sampling', default='')
    parser.add_argument('n', type=int, help='num of characters to sample')
    args = parser.parse_args()
    assert args.n > 0, 'n must > 0'
    sample('model.bin', args.n, seed_text=args.seed)