# FastText+MultiSeg

Research at Columbia University with Professor Smaranda Muresan. Written in C, and C++, this is a high-performance NLP model to introduce morphology guidance in traditional fasttext.
The two-year-long research was published at LREC (right when BERT came out and had been revised from a short paper to a long paper on request of reviewers. Eventually with shifting research-landscape led by BERT, we published this research at LREC to work on the next set of problems).

Feel free to leverage this following makefile for similar C/C++ codebase which boasts of quick training runtime in contrast to procedural programs. Please cite on using this work.

CXX = c++
HEADER_DIR=include
CXXFLAGS = -pthread -w -std=c++14 -march=native -I$(HEADER_DIR)
OBJDIR=bin
GTEST_DIR=$(PWD)/test/build
GTEST=googletest/googletest
PWD_DIR=$(PWD)
OBJS = $(addprefix $(OBJDIR)/, args.o dictionary.o evalparse.o matrix.o vector.o model.o utils.o biskip.o multiskip.o parfasttext.o )

opt: CXXFLAGS += -O3 -funroll-loops
opt: $(OBJDIR)/parfasttext
opt: test_build

coverage: CXXFLAGS += -O0 -fno-inline -fprofile-arcs --coverage
coverage: $(OBJDIR)/parfasttext
coverage: test_build

debug: CXXFLAGS += -g -O0 -fno-inline
debug: $(OBJDIR)/parfasttext
debug: test_build

$(OBJDIR):
	if [ ! -d "$(OBJDIR)" ];then mkdir $(OBJDIR); fi
	if [ ! -d "test/log" ];then mkdir test/log; fi

$(OBJDIR)/args.o: src/args.cc include/args.h
	$(CXX) $(CXXFLAGS) -c src/args.cc -o $@

$(OBJDIR)/dictionary.o: src/dictionary.cc include/dictionary.h $(HEADER_DIR)/args.h
	$(CXX) $(CXXFLAGS) -c -w src/dictionary.cc -o $@

$(OBJDIR)/evalparse.o: src/evalparse.cc $(HEADER_DIR)/evalparse.h
	$(CXX) $(CXXFLAGS) -c src/evalparse.cc -o $@

$(OBJDIR)/matrix.o: src/matrix.cc $(HEADER_DIR)/matrix.h $(HEADER_DIR)/utils.h
	$(CXX) $(CXXFLAGS) -c src/matrix.cc -o $@

$(OBJDIR)/vector.o: src/vector.cc $(HEADER_DIR)/vector.h $(HEADER_DIR)/utils.h
	$(CXX) $(CXXFLAGS) -c src/vector.cc -o $@

$(OBJDIR)/model.o: src/model.cc $(HEADER_DIR)/model.h $(HEADER_DIR)/args.h
	$(CXX) $(CXXFLAGS) -c src/model.cc -o $@

$(OBJDIR)/utils.o: src/utils.cc $(HEADER_DIR)/utils.h
	$(CXX) $(CXXFLAGS) -c src/utils.cc -o $@

$(OBJDIR)/biskip.o: src/biskip.cc $(HEADER_DIR)/biskip.h
	$(CXX) $(CXXFLAGS) -c src/biskip.cc -o $@

$(OBJDIR)/multiskip.o: src/multiskip.cc $(HEADER_DIR)/multiskip.h
	$(CXX) $(CXXFLAGS) -c src/multiskip.cc -o $@

$(OBJDIR)/parfasttext.o: src/parfasttext.cc $(HEADER_DIR)/*.h
	$(CXX) $(CXXFLAGS) -c -w src/parfasttext.cc -o $@

$(OBJDIR)/parfasttext: $(OBJDIR) $(OBJS) src/parfasttext.cc
	$(CXX) $(CXXFLAGS) $(OBJS) -w src/main.cc -o bitext

$(OBJDIR)/libgtest.a:
	if [ ! -d "$(GTEST_DIR)/googletest" ];then mkdir $(GTEST_DIR) && git clone https://github.com/google/googletest.git $(GTEST_DIR)/googletest ; fi
	rm -rf $(GTEST_DIR)/googletest/.git*
	rm -rf $(GTEST_DIR)/googletest/CMakeLists.txt
	cd $(GTEST_DIR)/$(GTEST) && $(CXX) $(CXXFLAGS) -isystem $(GTEST_DIR)/$(GTEST)/include -I$(GTEST_DIR)/$(GTEST) -pthread -c $(GTEST_DIR)/$(GTEST)/src/gtest-all.cc
	ar -rv $(OBJDIR)/libgtest.a $(GTEST_DIR)/$(GTEST)/gtest-all.o

test_log_clean:
	rm -rf test/log/*

test_build: $(OBJDIR)/libgtest.a ./test/test_method.cc ./test/test_main.cc
	g++ -std=c++11 -isystem $(GTEST_DIR)/$(GTEST)/include/ -pthread ./test/test_method.cc ./test/test_main.cc $(OBJDIR)/libgtest.a -w -o test_parfast

#test: test_log_clean test_build
#	time ./test_parfast

test: test_build
	time ./test_parfast

#clean:
#	rm -rf bin/*.o *.gcno *.gcda bitext test_parfast test_parfast1 bin test/log test/build

# clean:
# 	rm -rf bin/*.o *.gcno *.gcda bitext test_parfast test_parfast1 bin test/build

clean:
	rm -rf bin/*.o *.gcno *.gcda bitext test_parfast bin test/build