diff --git a/Makefile.simple b/Makefile.simple new file mode 100644 index 0000000..f13c840 --- /dev/null +++ b/Makefile.simple @@ -0,0 +1,65 @@ +# Simple Makefile for TRF +# No autotools required - just plain gcc + +CC = gcc +# Note: -fno-align-loops removed as it's not supported by clang +CFLAGS = -O2 -Wall -fno-align-functions +LDFLAGS = -lm + +# Platform detection +UNAME_S := $(shell uname -s) +UNAME_M := $(shell uname -m) + +ifeq ($(UNAME_S),Linux) + PLATFORM = linux + ifeq ($(UNAME_M),x86_64) + TARGET = linux64 + else + TARGET = linux32 + endif +endif + +ifeq ($(UNAME_S),Darwin) + PLATFORM = mac + TARGET = mac +endif + +VERSION = 4.10.0 + +# Source files +# Note: tr30dat.c is included via trfrun.h, so we only compile trf.c +SRCDIR = src +SOURCES = $(SRCDIR)/trf.c +HEADERS = $(SRCDIR)/tr30dat.h $(SRCDIR)/tr30dat.c $(SRCDIR)/trfrun.h $(SRCDIR)/trfclean.h + +# Output binary +BINARY = trf +VERSIONED_BINARY = trf$(VERSION).$(TARGET).exe + +# Build target +all: $(BINARY) + +$(BINARY): $(SOURCES) $(HEADERS) + @echo "Building TRF for $(PLATFORM) ($(TARGET))..." + $(CC) $(CFLAGS) -DUNIXCONSOLE $(SOURCES) -o $(BINARY) $(LDFLAGS) + @echo "Build complete: $(BINARY)" + +# Create versioned binary +versioned: $(BINARY) + cp $(BINARY) $(VERSIONED_BINARY) + @echo "Created versioned binary: $(VERSIONED_BINARY)" + +# Install (optional) +install: $(BINARY) + install -m 755 $(BINARY) /usr/local/bin/ + +# Clean +clean: + rm -f $(BINARY) $(VERSIONED_BINARY) + @echo "Clean complete" + +# Test build +test: $(BINARY) + ./$(BINARY) -v + +.PHONY: all versioned install clean test diff --git a/README.md b/README.md index 4fb3aff..f95f02c 100644 --- a/README.md +++ b/README.md @@ -61,52 +61,58 @@ To obtain current and/or earlier, pre-compiled versions of TRF: ## Instructions for Compiling ## To compile TRF, you will need a C compiler (e.g., gcc, clang) with the standard library installed. -We have tested compiling and installing TRF under UNIX-based systems (Linux, macOS) and for Windows under Cygwin/MinGW. +We have tested compiling and installing TRF under UNIX-based systems (Linux, macOS including Apple Silicon/ARM) and for Windows under Cygwin/MinGW. -Brief instructions (advanced): +### Simple Build (Recommended) ### + +The easiest way to build TRF requires only gcc/clang - no autotools needed: ```bash -# Check actual version -tar xzvf trf-4.10.0.tar.gz -cd trf-4.10.0 -mkdir build -cd build -../configure -make -# To install to system -sudo make install -# To copy binary elsewhere -cp src/trf DESTINATION +# Build with the simple script +./build.sh ``` -If you are cloning the repository, replace the first two lines above with: +That's it! The binary `trf` is ready to use. + +Alternatively, you can use the simple Makefile: ```bash -git clone https://github.com/Benson-Genomics-Lab/TRF.git -cd TRF +make -f Makefile.simple ``` -Step by step: +Or compile directly with a single command: -- Open a terminal window and change directory to the TRF directory created by a clone of this repository. -- Create a directory named 'build': `mkdir build` and change directory to that -- Type `../configure` in the terminal. -- Type `make` in the terminal. -- If you wish to install the binary, type `sudo make install` in the terminal. Otherwise, simply copy the binary from the `src` directory under `build`. - -This will: +```bash +gcc -O2 -Wall -fno-align-functions -DUNIXCONSOLE src/trf.c -o trf -lm +``` + +For more details on simplified building, see [BUILD.md](BUILD.md). + +### Traditional Build with Autotools ### -- compile the code -- place the executable version in the build/src directory. +If you prefer the traditional autotools build process: -The file will be called `trf` (`trf.exe` on Windows). For backwards compatibility with automated scripts exepcting TRF to follow a certain naming scheme, the installation will also create a symbolic link named `trf..exe`. +```bash +mkdir build +cd build +../configure +make + +# To install to system +sudo make install + +# Or to copy binary elsewhere +cp src/trf DESTINATION +``` + +The file will be called `trf` (`trf.exe` on Windows). For backwards compatibility with automated scripts expecting TRF to follow a certain naming scheme, the installation will also create a symbolic link named `trf..exe`. For example the file on a linux 64 bit operating system for version 4.10.0 will be called `trf4.10.0.linux64.exe`. ## Testing the Installation ## -Run the executable on the included test file `test_seqs.fasta`. (This assumes the executable has been named TRF.): +Run the executable on the included test file `t/test_seqs.fasta`: ```bash -trf test_seqs.fasta 2 5 7 80 10 50 2000 -l 10 +./trf t/test_seqs.fasta 2 5 7 80 10 50 2000 -l 10 ``` This should produce 9 files: ```bash @@ -362,6 +368,12 @@ Tandem Repeats Finder finds repeats for period sizes in the range from 1 to 2000 Some of these changes may be present in 4.09 and were undocumented, if they were caught during launch. They are officially present in 4.10.0. +**Build improvements:** + +* **ARM Mac (Apple Silicon) support:** TRF now compiles and runs natively on ARM-based Macs (M1, M2, M3, etc.) +* **Simplified build process:** New simple build script (`build.sh`) and Makefile (`Makefile.simple`) that require only gcc/clang - no autotools needed. See [BUILD.md](BUILD.md) for details. +* **One-command compilation:** TRF can now be built with a single gcc command for easy integration into containers and build systems. + Major: * TRF now has an exit status of 0 on success, and non-zero otherwise. diff --git a/build.sh b/build.sh new file mode 100755 index 0000000..033c4a6 --- /dev/null +++ b/build.sh @@ -0,0 +1,52 @@ +#!/bin/bash +# Simple build script for TRF - no dependencies required +# Just needs gcc (or clang) and standard C library + +set -e # Exit on error + +# Detect platform +UNAME_S=$(uname -s) +UNAME_M=$(uname -m) + +case "$UNAME_S" in + Linux*) + PLATFORM="linux" + if [ "$UNAME_M" = "x86_64" ]; then + TARGET="linux64" + else + TARGET="linux32" + fi + ;; + Darwin*) + PLATFORM="mac" + TARGET="mac" + ;; + CYGWIN*|MINGW*) + PLATFORM="windows" + if [ "$UNAME_M" = "x86_64" ]; then + TARGET="dos64" + else + TARGET="dos32" + fi + ;; + *) + echo "Unknown platform: $UNAME_S" + exit 1 + ;; +esac + +echo "Building TRF for $PLATFORM ($TARGET)..." + +# Compile flags +# Note: -fno-align-loops removed as it's not supported by clang +CFLAGS="-O2 -Wall -fno-align-functions" +LDFLAGS="-lm" + +# Use CC environment variable if set, otherwise default to gcc +CC="${CC:-gcc}" + +# Build +$CC $CFLAGS -DUNIXCONSOLE src/trf.c -o trf $LDFLAGS + +echo "Build complete: ./trf" +echo "Test with: ./trf -v" diff --git a/src/tr30dat.c b/src/tr30dat.c index f1e4189..38247fe 100644 --- a/src/tr30dat.c +++ b/src/tr30dat.c @@ -37,7 +37,7 @@ License along with TRF. If not, see . #define new1Darrayfunc(type,functionname,length)\ - type *functionname(int length)\ + type *functionname(size_t length)\ {\ type *objptr=calloc(length,sizeof(*objptr));\ if(objptr==NULL)\ diff --git a/src/tr30dat.h b/src/tr30dat.h index 5edc364..a8d8fd7 100644 --- a/src/tr30dat.h +++ b/src/tr30dat.h @@ -131,7 +131,7 @@ int counterInSeq = 0; //#endif /* 02/05/16 Y. Hernandez */ /* Since this is no longer a macro, use all lower case to avoid confusion. */ -unsigned int maxwraplength = 0; +size_t maxwraplength = 0; /* Added by Yevgeniy Gelfand on Jan 27, 2010 */ /* To have smaller sequences not send results */ @@ -200,7 +200,7 @@ int *Index; int *ACGTcount; unsigned char *Sequence; -int Length; +size_t Length; /* int S[MAXWRAPLENGTH+1][MAXPATTERNSIZE];*/ int Delta; /* indel penalty */ @@ -223,10 +223,10 @@ int *Tag; /* list of tags for linking active distances */ int Toptag; /* last tag in list */ struct pairalign { - int length; - int score; - char *textprime, *textsecnd; - int * indexprime, *indexsecnd; + size_t length; + int score; + char *textprime, *textsecnd; + int *indexprime, *indexsecnd; } AlignPair; struct cons_data { @@ -247,8 +247,8 @@ struct cons_data { /*** new program started 11-29-95 **********/ struct bestperiodlistelement { - int indexhigh; - int indexlow; + size_t indexhigh; + size_t indexlow; int best1; int best2; int best3; @@ -258,14 +258,14 @@ struct bestperiodlistelement { } Bestperiodlist[1]; struct distanceentry { - int location; + size_t location; int size; }; struct distancelist { int k_run_sums_criteria, waiting_time_criteria, lo_d_range, hi_d_range; int numentries, nummatches; - int lowindex, highindex; + size_t lowindex, highindex; int linked; int linkdown, linkup; struct distanceentry *entry; @@ -279,18 +279,18 @@ struct distancelist { alignment in the same region with the same distance */ struct distanceseenarrayelement { - int index; - int end; + size_t index; + size_t end; int score; } * Distanceseenarray; struct distancelistelement { - int index; + size_t index; int distance; int changed_from_distance; /* use for test in search_for_distance_match_in_distanceseenlist 3/10/05 */ - int end; + size_t end; int score; int best_possible_score; /* number of copies X length X match weight */ int accepted; @@ -360,7 +360,7 @@ typedef struct { int redundoff; int ngs; int use_stdin; - unsigned int maxwraplength; + size_t maxwraplength; char inputfilename[_MAX_PATH]; /* constant defined in stdlib */ char outputprefix[_MAX_PATH]; @@ -491,11 +491,11 @@ int print_flanking = 0; typedef struct { /* Changed to unsigned Feb 16, 2016 Yozen */ - unsigned int length; - int composition[26]; - int nucleotides; - char name[MAXSEQNAMELEN]; - char * sequence; + size_t length; + int composition[26]; + int nucleotides; + char name[MAXSEQNAMELEN]; + char *sequence; } FASTASEQUENCE; diff --git a/src/trf.c b/src/trf.c index 16505d7..cb12cf4 100644 --- a/src/trf.c +++ b/src/trf.c @@ -81,6 +81,7 @@ char* GetNamePartAddress(char* name); void PrintBanner(void); static int ParseInt(const char *str, int *dest); static int ParseUInt(const char *str, unsigned int *dest); +static int ParseSize(const char *str, size_t *dest); int main(int ac, char** av) { @@ -204,7 +205,7 @@ int main(int ac, char** av) exit(2); } - if (ParseUInt(av[8], ¶mset.maxwraplength) == 0) { + if (ParseSize(optarg, ¶mset.maxwraplength) == 0) { fprintf(stderr, "Error while parsing max TR length (option '-L') value\n"); PrintBanner(); exit(1); @@ -368,6 +369,20 @@ static int ParseUInt(const char *str, unsigned int *dest) } } +static int ParseSize(const char *str, size_t *dest) +{ + errno = 0; + char *temp; + unsigned long long val = strtoull(str, &temp, 0); + + if (temp == str || *temp != '\0' || (val == ULLONG_MAX && errno == ERANGE)) { + return 0; + } + + *dest = val; + return 1; +} + void PrintBanner(void) { fprintf(stderr,"\nTandem Repeats Finder, Version %s", versionstring); diff --git a/src/trfclean.h b/src/trfclean.h index f48dd3e..d23669f 100644 --- a/src/trfclean.h +++ b/src/trfclean.h @@ -174,7 +174,7 @@ IL* GetList(char * datafile) for (counter =0; counter<3; counter++) fgets(hparameters, 255, fp); /* get hlength from another global variable (bad practice)*/ - sprintf(hlength, "Length: %d", Length); + sprintf(hlength, "Length: %zu", Length); /* loop to fill out list from buffer */ counter = 1; /* keeps track of order they are found */ diff --git a/src/trfrun.h b/src/trfrun.h index 2eba071..e19116e 100644 --- a/src/trfrun.h +++ b/src/trfrun.h @@ -63,8 +63,8 @@ struct index_list { int count; /* indicates order in original file */ char ref[45]; /* records label for linking */ - int first; /* first index */ - int last; /* last index */ + size_t first; /* first index */ + size_t last; /* last index */ int period; /* period size */ float copies; /* number of copies */ int size; /* consensus size */ @@ -192,7 +192,7 @@ void TRFControlRoutine(void) { sprintf(hsequence,"Sequence: %s\n",seq.name); - sprintf(hlength,"Length: %d",seq.length); + sprintf(hlength,"Length: %zu",seq.length); paramset.multisequencefile = 0; paramset.sequenceordinal = 1; @@ -230,7 +230,7 @@ void TRFControlRoutine(void) { IL* lpointer; - int charcount; + size_t charcount; if (paramset.ngs != 1) { fprintf(destdfp,"Tandem Repeats Finder Program written by:\n\n"); @@ -256,7 +256,7 @@ void TRFControlRoutine(void) for(lpointer=GlobalIndexList;lpointer!=NULL;lpointer=lpointer->next) { - fprintf(destdfp,"%d %d %d %.1f %d %d %d %d %d %d %d %d %.2f %s ", + fprintf(destdfp,"%zu %zu %d %.1f %d %d %d %d %d %d %d %d %.2f %s ", lpointer->first, lpointer->last, lpointer->period, lpointer->copies, lpointer->size, lpointer->matches, lpointer->indels, lpointer->score, lpointer->acount, @@ -267,7 +267,7 @@ void TRFControlRoutine(void) /* print short flanks to .dat file */ if (paramset.ngs) { - int flankstart,flankend; + size_t flankstart,flankend; flankstart = lpointer->first - 50; flankstart=max(1,flankstart); @@ -414,7 +414,7 @@ void TRFControlRoutine(void) { sprintf(hsequence,"Sequence: %s\n",seq.name); - sprintf(hlength,"Length: %d",seq.length); + sprintf(hlength,"Length: %zu",seq.length); /* set the prefix to be used for naming of output */ sprintf(input,"%s.s%d",prefix,i); @@ -434,7 +434,7 @@ void TRFControlRoutine(void) { IL* lpointer; - int charcount; + size_t charcount; /* only for the first one write the header */ if (i==1) { @@ -463,7 +463,7 @@ void TRFControlRoutine(void) for(lpointer=GlobalIndexList;lpointer!=NULL;lpointer=lpointer->next) { - fprintf(destdfp,"%d %d %d %.1f %d %d %d %d %d %d %d %d %.2f %s ", + fprintf(destdfp,"%zu %zu %d %.1f %d %d %d %d %d %d %d %d %.2f %s ", lpointer->first, lpointer->last, lpointer->period, lpointer->copies, lpointer->size, lpointer->matches, lpointer->indels, lpointer->score, lpointer->acount, @@ -474,7 +474,7 @@ void TRFControlRoutine(void) /* print short flanks to .dat file */ if (paramset.ngs) { - int flankstart,flankend; + size_t flankstart,flankend; flankstart = lpointer->first - 50; flankstart=max(1,flankstart); @@ -796,7 +796,7 @@ void TRF(FASTASEQUENCE* pseq) Length=pseq->length; if (!paramset.HTMLoff) { - fprintf(Fptxt,"\n\nLength: %d",Length); + fprintf(Fptxt,"\n\nLength: %zu",Length); fprintf(Fptxt,"\nACGTcount: A:%3.2f, C:%3.2f, G:%3.2f, T:%3.2f\n\n", (double)pseq->composition['A'-'A']/Length, (double)pseq->composition['C'-'A']/Length, @@ -1076,7 +1076,9 @@ void SetProgressBar(void) ******************************************************/ int LoadSequenceFromFileBenson(FASTASEQUENCE *pseq,FILE* fp) { - int letter,i,pos1,length,next; + int letter,i,next; + long pos1; + size_t length; char *ptext; // read the FASTA '>' symbol @@ -1174,7 +1176,8 @@ int LoadSequenceFromFileBenson(FASTASEQUENCE *pseq,FILE* fp) int LoadSequenceFromFileEugene(FASTASEQUENCE *pseq, FILE* fp) { - int i, j, c; + size_t i, j; + int c; int next = -1; // whether a next sequence was encountered char *ptemp; char to_upper;