Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@
*.out
*.log
__pycache__
*.sw[p,o]
6 changes: 3 additions & 3 deletions kw/boot/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@
# Builds the PULP bootloader without the run time. We need a separate link
# script since the code is loacted at a different static location (boot ROM).

PULP_APP = boot-oprecompkw
PULP_APP = rom-oprecompkw
PULP_APP_FC_SRCS = boot.c
PULP_APP_FC_ASM_SRCS = crt0.S
PULP_CFLAGS = -O3 -g -fno-jump-tables
PULP_LDFLAGS = -Tlink.ld -nostdlib
CONFIG_OPT = options/rt/no-crt0=true options/rt/no-link-script=true options/rt/no-rt=true
PULP_LDFLAGS = -Tlink.ld -nostdlib -lgcc
CONFIG_OPT = rt/no-crt0=true rt/no-link-script=true rt/no-rt=true

include $(PULP_SDK_HOME)/install/rules/pulp_rt.mk
2 changes: 1 addition & 1 deletion kw/boot/boot.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ inline uint64_t host2local(uint64_t host) {


// Rudimentary putc/puts for output.
#define boot_putc_address (STDOUT_BASE_ADDR + STDOUT_PUTC_OFFSET + (hal_core_id()<<3) + (hal_cluster_id()<<7))
#define boot_putc_address (ARCHI_STDOUT_ADDR + STDOUT_PUTC_OFFSET + (hal_core_id()<<3) + (hal_cluster_id()<<7))

static void boot_putc(unsigned enable, unsigned int c) {
if (enable) {
Expand Down
2 changes: 1 addition & 1 deletion kw/boot/link.ld
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ __DYNAMIC = 0;
MEMORY
{
ROM : ORIGIN = 0x1A000000, LENGTH = 0x2000
L1 : ORIGIN = 0x10000000, LENGTH = 0x10000
L1 : ORIGIN = 0x10000004, LENGTH = 0xFFFC
L2 : ORIGIN = 0x1C000000, LENGTH = 0x4000
}

Expand Down
6 changes: 3 additions & 3 deletions kw/libcxl/libcxl.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
#include <pthread.h>

#include "libcxl.h"
#include "gv/gv_launcher.h"
#include "vp/launcher.h"


struct cxl_afu_h {
Expand Down Expand Up @@ -119,14 +119,14 @@ afu_open(struct cxl_afu_h *afu) {
}

// Configure the handles into the platform.
afu->capi_binding = gv_ioreq_binding(afu->gv, "soc/soc_ico", (void *)((uint64_t)1 << 48), (uint64_t)1 << 48, capi_callback, afu);
afu->capi_binding = gv_ioreq_binding(afu->gv, "soc/host_injector", (void *)((uint64_t)1 << 48), (uint64_t)1 << 48, capi_callback, afu);
// afu->capi_binding = gv_ioreq_binding(afu->gv, "soc/soc_ico", (void *)0x1a600000, 0x00100000, capi_callback, afu);
if (afu->capi_binding == NULL) {
fprintf(stderr, "libcxl error: unable to establish CAPI binding (gv_ioreq_binding)\n");
goto fail;
}

afu->wed_binding = gv_ioreq_binding(afu->gv, "job_fifo", NULL, 0, NULL, NULL);
afu->wed_binding = gv_ioreq_binding(afu->gv, "job_fifo_injector", NULL, 0, NULL, NULL);
if (afu->wed_binding == NULL) {
fprintf(stderr, "libcxl error: unable to establish WED FIFO binding (gv_ioreq_binding)\n");
goto fail;
Expand Down
3 changes: 2 additions & 1 deletion kw/liboprecomp/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,5 @@ install:
install -D build/liboprecomp.a $(PULP_SDK_HOME)/install/ws/lib
install -D build/liboprecomp.so $(PULP_SDK_HOME)/install/ws/lib
install -D liboprecomp.h $(PULP_SDK_HOME)/install/ws/include
plpinfo tree --config-options="gvsoc/load-binary_eval=os.environ.get('PULP_SDK_HOME') + '/install/bin/boot-oprecompkw'" > $(PULP_SDK_HOME)/install/cfg/oprecompkw_config.json
mkdir -p $(PULP_SDK_HOME)/install/cfg
plpinfo tree --config-options="plt_loader/load-binary_eval=os.environ.get('PULP_SDK_HOME') + '/install/bin/rom-oprecompkw'" --config-options="soc/cluster/pe0/fetch_enable=true" --config-options="chip/boot_from_rom=true" --config-options="loader/boot/mode=rom" > $(PULP_SDK_HOME)/install/cfg/oprecompkw_config.json
2 changes: 1 addition & 1 deletion kw/mb/blstm/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@ all:
$(MAKE) -C host

run: all
host/build/blstm pulp/build/system.oprecompkw/blstm/blstm
host/build/blstm pulp/build/oprecompkw/blstm/blstm
2 changes: 1 addition & 1 deletion kw/mb/blstm/host/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ CFLAGS ?= -O3 -g -Wall -Wextra -std=c99
CPPFLAGS ?= -O3 -g -Wall -Wextra -std=c++11 -fopenmp
CFLAGS += -I$(PULP_SDK_HOME)/install/ws/include
CPPFLAGS += -I$(PULP_SDK_HOME)/install/ws/include -I./include
LDFLAGS ?= -L$(PULP_SDK_HOME)/install/ws/lib -loprecomp -lcxl -lgvlauncher -lpthread
LDFLAGS ?= -L$(PULP_SDK_HOME)/install/ws/lib -loprecomp -lcxl -lpulpvplauncher -lpthread

all: clean build/neuron build/blstm

Expand Down
2 changes: 1 addition & 1 deletion kw/samples/nop/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@ all:
$(MAKE) -C host

run: all
host/build/nop pulp/build/system.oprecompkw/nop/nop
host/build/nop pulp/build/oprecompkw/nop/nop
2 changes: 1 addition & 1 deletion kw/samples/nop/host/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

CFLAGS ?= -O3 -g -Wall -Wextra
CFLAGS += -I$(PULP_SDK_HOME)/install/ws/include
LDFLAGS ?= -L$(PULP_SDK_HOME)/install/ws/lib -loprecomp -lcxl -lgvlauncher -lpthread
LDFLAGS ?= -L$(PULP_SDK_HOME)/install/ws/lib -loprecomp -lcxl -lpulpvplauncher -lpthread

all:: build/nop

Expand Down
25 changes: 25 additions & 0 deletions kw/samples/nop/testset.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
from plptest import *

TestConfig = c = {}

def check_output(config, output):

if output.find('Hello from the loaded PULP binary!') == -1:
return (False, "Didn't find output string")

return (True, None)


def get_test():
return Test(
name = 'nop',
commands = [
Shell('build', 'make all run'),
Check('check', check_output)
],
timeout=1000000000
)

c['tests'] = [ ]

c['tests'].append(get_test())
2 changes: 1 addition & 1 deletion kw/samples/square/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@ all:
$(MAKE) -C host

run: all
host/build/square pulp/build/system.oprecompkw/square/square
host/build/square pulp/build/oprecompkw/square/square
2 changes: 1 addition & 1 deletion kw/samples/square/host/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

CFLAGS ?= -O3 -g -Wall -Wextra -std=c99
CFLAGS += -I$(PULP_SDK_HOME)/install/ws/include
LDFLAGS ?= -L$(PULP_SDK_HOME)/install/ws/lib -loprecomp -lcxl -lgvlauncher -lpthread
LDFLAGS ?= -L$(PULP_SDK_HOME)/install/ws/lib -loprecomp -lcxl -lpulpvplauncher -lpthread

all:: build/square

Expand Down
25 changes: 25 additions & 0 deletions kw/samples/square/testset.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
from plptest import *

TestConfig = c = {}

def check_output(config, output):

if output.find('all correct') == -1:
return (False, "Didn't find output string")

return (True, None)


def get_test():
return Test(
name = 'square',
commands = [
Shell('build', 'make all run'),
Check('check', check_output)
],
timeout=1000000
)

c['tests'] = [ ]

c['tests'].append(get_test())
2 changes: 1 addition & 1 deletion mb/cnn/kw/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@ all:
$(MAKE) -C host

run: all
host/build/conv pulp/build/system.oprecompkw/conv/conv
host/build/conv pulp/build/oprecompkw/conv/conv
2 changes: 1 addition & 1 deletion mb/cnn/kw/host/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

CFLAGS ?= -O3 -g -Wall -Wextra -std=c99
CFLAGS += -I$(PULP_SDK_HOME)/install/ws/include
LDFLAGS ?= -L$(PULP_SDK_HOME)/install/ws/lib -loprecomp -lcxl -lgvlauncher -lpthread
LDFLAGS ?= -L$(PULP_SDK_HOME)/install/ws/lib -loprecomp -lcxl -lpulpvplauncher -lpthread

all:: build/conv

Expand Down
28 changes: 21 additions & 7 deletions mb/cnn/kw/pulp/pulp.c
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ int main(uint64_t wedptr) {

// Determine the tiling of the input data via a binary search. This is quite
// inefficient and should be improved.
const uint32_t max_size = 65536;
const uint32_t max_size = 65536 - 0x5000;
const uint32_t max_volume = (max_size - (16 << 10)) / 4;
printf("Determining Tiling\n");
printf(" max_volume = %lu\n", max_volume);
Expand Down Expand Up @@ -96,6 +96,11 @@ int main(uint64_t wedptr) {
float *tile_x = rt_alloc(RT_ALLOC_CL_DATA, bufsz_x);
float *tile_w = rt_alloc(RT_ALLOC_CL_DATA, bufsz_w);
float *tile_y = rt_alloc(RT_ALLOC_CL_DATA, bufsz_y);
if (tile_x == NULL || tile_y == NULL || tile_w == NULL)
{
printf("Failed to allocate memory\n");
return -1;
}

// Perform the outer iterations.
for (int32_t ko1 = 0; ko1 < wed.KO; ko1 += TKO) {
Expand Down Expand Up @@ -123,25 +128,31 @@ int main(uint64_t wedptr) {
const int32_t XM1 = m_hi-m_lo;

printf("Loading x tile ki1=%lu..%lu, n=%lu..%lu (%ld), m=%lu..%lu (%ld)\n", ki1, ki1+KI1, n_lo, n_hi, XN1, m_lo, m_hi, XM1);
int merge = 0;
for (int32_t ki2 = 0; ki2 < KI1; ++ki2) {
for (int32_t n2 = 0; n2 < XN1; ++n2) {
plp_dma_memcpy(
plp_dma_memcpy_merge(
host2local(wed.in_x + (ki1+ki2)*wed.N*wed.M*4 + (n2+n_lo)*wed.M*4 + m_lo*4),
(uint32_t)tile_x + ki2*XN1*XM1*4 + n2*XM1*4,
XM1*4,
PLP_DMA_EXT2LOC
PLP_DMA_EXT2LOC,
merge
);
merge = 1;
}
}
plp_dma_barrier();
merge = 0;
for (int32_t ko2 = 0; ko2 < KO1; ++ko2) {
for (int32_t ki2 = 0; ki2 < KI1; ++ki2) {
plp_dma_memcpy(
plp_dma_memcpy_merge(
host2local(wed.in_w + (ko1+ko2)*wed.KI*wed.U*wed.V*4 + (ki1+ki2)*wed.U*wed.V*4),
(uint32_t)tile_w + ko2*TKI*wed.U*wed.V*4 + ki2*wed.U*wed.V*4,
wed.U*wed.V*4,
PLP_DMA_EXT2LOC
PLP_DMA_EXT2LOC,
merge
);
merge = 1;
}
}
plp_dma_barrier();
Expand Down Expand Up @@ -171,14 +182,17 @@ int main(uint64_t wedptr) {
}

// Write result tile back.
int merge = 0;
for (int32_t ko2 = 0; ko2 < KO1; ++ko2) {
for (int32_t n2 = 0; n2 < N1; ++n2) {
plp_dma_memcpy(
plp_dma_memcpy_merge(
host2local(wed.out_y + (ko1+ko2)*wed.N*wed.M*4 + (n1+n2)*wed.M*4 + m1*4),
(uint32_t)tile_y + ko2*N1*M1*4 + n2*M1*4,
M1*4,
PLP_DMA_LOC2EXT
PLP_DMA_LOC2EXT,
merge
);
merge = 1;
}
}
}
Expand Down
17 changes: 14 additions & 3 deletions mb/sparsesolve/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ LDFLAGS = -lm -lrt

build: sparsesolve

sparsesolve: sparsesolve.o mmio.o matrix.o cg.o ir.o oprecomp.o
sparsesolve: sparsesolve.o mmio.o matrix.o oprecomp.o
$(CC) $^ $(LDFLAGS) -o $@

clean:
Expand All @@ -21,9 +21,20 @@ tags: *.c *.h
ctags *.c *.h

test: sparsesolve
./sparsesolve data/prepared/mb/sparsesolve/bcsstk01.mtx 1 1e-7 10000 1e-7 100
./sparsesolve data/prepared/mb/sparsesolve/bcsstk01.mtx 1 1e-7 10000 1e-7 52

%.o: %.c cg.h vector.h matrix.h
plots: bcsstk01.pdf gr_30_30.pdf msc10848.pdf

%.pdf: %.eps
epstopdf $<

%.eps: sparsesolve.plt %.dat
gnuplot -c $^ $* > $@

%.dat: sparsesolve
for i in `seq 1 52`; do ./sparsesolve ../data/prepared/mb/sparsesolve/$*.mtx 10000 1e-7 8 $$i ; done | grep -v '#' > $@

%.o: %.c floatm.h vector.h matrix.h
$(CC) $(CFLAGS) -c $< -o $@

oprecomp.o: $(VPATH)../common/oprecomp.c
Expand Down
30 changes: 30 additions & 0 deletions mb/sparsesolve/floatm.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#ifndef FLOATM
#define FLOATM double
#endif

#ifndef DOUBLE
#define DOUBLE double
#endif

static inline double double_truncate(uint8_t m, double y)
{
if (m >= 52) return y; // nothing to do
if (!isfinite(y)) return y; // NaN and Inf does not need conversion
union { double d; uint64_t i; } c;
c.d = y;
c.i &= ~(((1UL << (52 - m)) - 1UL));
return c.d;
}

static inline float float_truncate(uint8_t m, float y)
{
if (m >= 23) return y; // nothing to do
if (!isfinite(y)) return y; // NaN and Inf does not need conversion
union { float f; uint32_t i; } c;
c.f = y;
c.i &= ~(((1U << (23 - m)) - 1U));
return c.f;
}

#define TRUNCATE2(t, m, y) t ## _truncate(m, y)
#define TRUNCATE(t, m, y) TRUNCATE2(t, m, y)
Loading