39 int stack_size =
stack_.size();
40 for (
int i = 1; i < stack_size; ++i) {
52 bool parallel_debug =
false;
56 parallel_debug =
true;
59 int stack_size =
stack_.size();
64 for (
int i = 0; i < stack_size; ++i) {
68 #pragma omp parallel for num_threads(stack_size)
70 for (
int i = 0; i < stack_size; ++i) {
71 stack_[i]->Forward(debug, input,
nullptr, scratch, results[i]);
76 for (
int i = 0; i < stack_size; ++i) {
77 out_offset = output->
CopyPacking(*results[i], out_offset);
88 src_transpose = &transposed_input_;
92 for (
int i = 0; i < stack_size; ++i) {
93 stack_[i]->Forward(debug, input, src_transpose, scratch, result);
100 out_offset = output->
CopyPacking(*result, out_offset);
103 if (parallel_debug) {
119 int stack_size =
stack_.size();
126 int feature_offset = 0;
127 for (
int i = 0; i <
stack_.size(); ++i) {
128 int num_features =
stack_[i]->NumOutputs();
129 in_deltas[i].Resize(fwd_deltas, num_features, scratch);
131 in_deltas[i]->CopyUnpacking(fwd_deltas, feature_offset, num_features);
132 feature_offset += num_features;
135 #pragma omp parallel for num_threads(stack_size)
137 for (
int i = 0; i < stack_size; ++i) {
138 stack_[i]->Backward(debug, *in_deltas[i], scratch,
139 i == 0 ? back_deltas : out_deltas[i]);
142 for (
int i = 1; i < stack_size; ++i) {
152 int feature_offset = 0;
153 for (
int i = 0; i <
stack_.size(); ++i) {
154 int num_features =
stack_[i]->NumOutputs();
155 in_deltas->
CopyUnpacking(fwd_deltas, feature_offset, num_features);
156 feature_offset += num_features;
157 if (
stack_[i]->
Backward(debug, *in_deltas, scratch, back_deltas)) {
161 out_deltas->
CopyAll(*back_deltas);