Skip to content

Commit cc6a2f4

Browse files
author
yanghaoqi
committed
add warpctc caffe layer
1 parent 9b89154 commit cc6a2f4

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

51 files changed

+7543
-171
lines changed

CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,12 +27,12 @@ include(cmake/Summary.cmake)
2727
include(cmake/ConfigGen.cmake)
2828

2929
# ---[ Options
30-
caffe_option(CPU_ONLY "Build Caffe without CUDA support" OFF) # TODO: rename to USE_CUDA
30+
caffe_option(CPU_ONLY "Build Caffe without CUDA support" ON) # TODO: rename to USE_CUDA
3131
caffe_option(USE_CUDNN "Build Caffe with cuDNN library support" ON IF NOT CPU_ONLY)
3232
caffe_option(USE_NCCL "Build Caffe with NCCL library support" OFF)
3333
caffe_option(BUILD_SHARED_LIBS "Build shared libraries" ON)
3434
caffe_option(BUILD_python "Build Python wrapper" ON)
35-
set(python_version "2" CACHE STRING "Specify which Python version to use")
35+
set(python_version "3" CACHE STRING "Specify which Python version to use")
3636
caffe_option(BUILD_matlab "Build Matlab wrapper" OFF IF UNIX OR APPLE)
3737
caffe_option(BUILD_docs "Build documentation" ON IF UNIX OR APPLE)
3838
caffe_option(BUILD_python_layer "Build the Caffe Python layer" ON)

Makefile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -198,14 +198,14 @@ ifeq ($(USE_HDF5), 1)
198198
LIBRARIES += hdf5_hl hdf5
199199
endif
200200
ifeq ($(USE_OPENCV), 1)
201-
LIBRARIES += opencv_core opencv_highgui opencv_imgproc
201+
LIBRARIES += opencv_core opencv_highgui opencv_imgproc opencv_imgcodecs
202202

203203
ifeq ($(OPENCV_VERSION), 3)
204204
LIBRARIES += opencv_imgcodecs
205205
endif
206206

207207
endif
208-
PYTHON_LIBRARIES ?= boost_python python2.7
208+
PYTHON_LIBRARIES ?= boost_python python3.8
209209
WARNINGS := -Wall -Wno-sign-compare
210210

211211
##############################

cmake/Cuda.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -198,7 +198,7 @@ function(detect_cuDNN)
198198
set(HAVE_CUDNN TRUE PARENT_SCOPE)
199199
set(CUDNN_FOUND TRUE PARENT_SCOPE)
200200

201-
file(READ ${CUDNN_INCLUDE}/cudnn.h CUDNN_VERSION_FILE_CONTENTS)
201+
file(READ ${CUDNN_INCLUDE}/cudnn_version.h CUDNN_VERSION_FILE_CONTENTS)
202202

203203
# cuDNN v3 and beyond
204204
string(REGEX MATCH "define CUDNN_MAJOR * +([0-9]+)"
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
/******************************************************************************
2+
* Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved.
3+
*
4+
* Redistribution and use in source and binary forms, with or without
5+
* modification, are permitted provided that the following conditions are met:
6+
* * Redistributions of source code must retain the above copyright
7+
* notice, this list of conditions and the following disclaimer.
8+
* * Redistributions in binary form must reproduce the above copyright
9+
* notice, this list of conditions and the following disclaimer in the
10+
* documentation and/or other materials provided with the distribution.
11+
* * Neither the name of the NVIDIA CORPORATION nor the
12+
* names of its contributors may be used to endorse or promote products
13+
* derived from this software without specific prior written permission.
14+
*
15+
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16+
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17+
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18+
* ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
19+
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20+
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21+
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22+
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23+
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24+
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25+
*
26+
******************************************************************************/
Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
/******************************************************************************
2+
* Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved.
3+
*
4+
* Redistribution and use in source and binary forms, with or without
5+
* modification, are permitted provided that the following conditions are met:
6+
* * Redistributions of source code must retain the above copyright
7+
* notice, this list of conditions and the following disclaimer.
8+
* * Redistributions in binary form must reproduce the above copyright
9+
* notice, this list of conditions and the following disclaimer in the
10+
* documentation and/or other materials provided with the distribution.
11+
* * Neither the name of the NVIDIA CORPORATION nor the
12+
* names of its contributors may be used to endorse or promote products
13+
* derived from this software without specific prior written permission.
14+
*
15+
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16+
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17+
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18+
* ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
19+
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20+
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21+
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22+
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23+
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24+
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25+
*
26+
******************************************************************************/
27+
28+
/******************************************************************************
29+
*
30+
* Code and text by Sean Baxter, NVIDIA Research
31+
* See http://nvlabs.github.io/moderngpu for repository and documentation.
32+
*
33+
******************************************************************************/
34+
35+
#pragma once
36+
37+
#include "ctasearch.cuh"
38+
#include "loadstore.cuh"
39+
40+
namespace mgpu {
41+
42+
////////////////////////////////////////////////////////////////////////////////
43+
// DeviceLoadBalancingSearch
44+
// Upper Bound search from A (needles) into B (haystack). The A values are
45+
// natural numbers from aBegin to aEnd. bFirst is the index of the B value at
46+
// bBegin in shared memory.
47+
48+
template<int VT, bool RangeCheck>
49+
MGPU_DEVICE void DeviceSerialLoadBalanceSearch(const int* b_shared, int aBegin,
50+
int aEnd, int bFirst, int bBegin, int bEnd, int* a_shared) {
51+
52+
int bKey = b_shared[bBegin];
53+
54+
#pragma unroll
55+
for(int i = 0; i < VT; ++i) {
56+
bool p;
57+
if(RangeCheck)
58+
p = (aBegin < aEnd) && ((bBegin >= bEnd) || (aBegin < bKey));
59+
else
60+
p = aBegin < bKey;
61+
62+
if(p)
63+
// Advance A (the needle).
64+
a_shared[aBegin++] = bFirst + bBegin;
65+
else
66+
// Advance B (the haystack).
67+
bKey = b_shared[++bBegin];
68+
}
69+
}
70+
71+
////////////////////////////////////////////////////////////////////////////////
72+
// CTALoadBalance
73+
// Computes upper_bound(counting_iterator<int>(first), b_global) - 1.
74+
75+
// Unlike most other CTA* functions, CTALoadBalance loads from global memory.
76+
// This returns the loaded B elements at the beginning or end of shared memory
77+
// depending on the aFirst argument.
78+
79+
// CTALoadBalance requires NT * VT + 2 slots of shared memory.
80+
template<int NT, int VT, typename InputIt>
81+
MGPU_DEVICE int4 CTALoadBalance(int destCount, InputIt b_global,
82+
int sourceCount, int block, int tid, const int* mp_global,
83+
int* indices_shared, bool loadPrecedingB) {
84+
85+
int4 range = ComputeMergeRange(destCount, sourceCount, block, 0, NT * VT,
86+
mp_global);
87+
88+
int a0 = range.x;
89+
int a1 = range.y;
90+
int b0 = range.z;
91+
int b1 = range.w;
92+
if(!b0) loadPrecedingB = false;
93+
94+
// Load one trailing term from B. If we're already at the end, fill the
95+
// end of the buffer with destCount.
96+
int aCount = a1 - a0;
97+
int bCount = b1 - b0;
98+
int extended = b1 < sourceCount;
99+
int loadCount = bCount + extended;
100+
int fillCount = NT * VT + 1 - loadCount - aCount;
101+
102+
int* a_shared = indices_shared;
103+
int* b_shared = indices_shared + aCount + (int)loadPrecedingB;
104+
105+
// Load the B values.
106+
// DeviceMemToMemLoop<NT>(bCount + extended + (int)loadPrecedingB,
107+
// b_global + b0 - (int)loadPrecedingB, tid,
108+
// b_shared - (int)loadPrecedingB);
109+
110+
for(int i = tid - (int)loadPrecedingB; i < bCount + extended; i += NT)
111+
b_shared[i] = b_global[b0 + i];
112+
113+
// Fill the end of the array with destCount.
114+
for(int i = tid + extended; i < fillCount; i += NT)
115+
b_shared[bCount + i] = destCount;
116+
__syncthreads();
117+
118+
// Run a merge path to find the start of the serial merge for each thread.
119+
int diag = VT * tid;
120+
int mp = MergePath<MgpuBoundsUpper>(mgpu::counting_iterator<int>(a0),
121+
aCount, b_shared, bCount, diag, mgpu::less<int>());
122+
123+
int a0tid = a0 + mp;
124+
int b0tid = diag - mp;
125+
126+
// Subtract 1 from b0 because we want to return upper_bound - 1.
127+
DeviceSerialLoadBalanceSearch<VT, false>(b_shared, a0tid, a1, b0 - 1,
128+
b0tid, bCount, a_shared - a0);
129+
__syncthreads();
130+
131+
b0 -= (int)loadPrecedingB;
132+
return make_int4(a0, a1, b0, b1);
133+
}
134+
135+
136+
} // namespace mgpu

0 commit comments

Comments
 (0)