Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 10 additions & 73 deletions external_libs/pgenlib/include/pgenlib_misc.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// This library is part of PLINK 2.0, copyright (C) 2005-2024 Shaun Purcell,
// This library is part of PLINK 2.0, copyright (C) 2005-2025 Shaun Purcell,
// Christopher Chang.
//
// This library is free software: you can redistribute it and/or modify it
Expand All @@ -14,13 +14,20 @@
// You should have received a copy of the GNU Lesser General Public License
// along with this library. If not, see <http://www.gnu.org/licenses/>.


#include "pgenlib_misc.h"

#include <limits.h>

#ifdef __cplusplus
namespace plink2 {
#endif

#ifndef NDEBUG
char* g_pgl_errbuf = nullptr;
char* g_pgl_errbuf_write_iter = nullptr;
char* g_pgl_errbuf_end = nullptr;
#endif

#ifdef USE_AVX2
void CopyNyparrNonemptySubset(const uintptr_t* __restrict raw_nyparr, const uintptr_t* __restrict subset_mask, uint32_t raw_nyparr_entry_ct, uint32_t subset_entry_ct, uintptr_t* __restrict output_nyparr) {
if (subset_entry_ct == raw_nyparr_entry_ct) {
Expand Down Expand Up @@ -1244,6 +1251,7 @@ void TransposeNypblock64(const uintptr_t* read_iter, uint32_t read_ul_stride, ui
// and target_4567 to
// _ (4, 0) _ (5, 0) _ (6, 0) _ (7, 0) _ (4, 1) _ (5, 1) _ (6, 1) ...
// This is perfectly arranged for movemask.
// todo: better ARM implementation
VecW target_4567 = vecw_blendv(loader, vecw_srli(loader, 7), m8);
target_iter7[vidx] = vecw_movemask(target_4567);
target_4567 = vecw_slli(target_4567, 2);
Expand Down Expand Up @@ -3261,77 +3269,6 @@ void ClearGenoarrMissing1bit8Unsafe(const uintptr_t* __restrict genoarr, uint32_
}
}

void ClearGenoarrMissing1bit16Unsafe(const uintptr_t* __restrict genoarr, uint32_t* subset_sizep, uintptr_t* __restrict subset, void* __restrict sparse_vals) {
const uint32_t orig_subset_size = *subset_sizep;
Halfword* subset_alias = DowncastWToHW(subset);
uint32_t read_idx = 0;
// deliberate overflow
for (uint32_t read_widx = UINT32_MAX; ; ) {
uint32_t subset_bits;
do {
subset_bits = subset_alias[++read_widx];
} while (!subset_bits);
uintptr_t detect_11 = genoarr[read_widx];
detect_11 = detect_11 & (detect_11 >> 1) & kMask5555;
if (detect_11) {
uint32_t detect_11_hw = PackWordToHalfword(detect_11);
const uint32_t joint_u32 = subset_bits & detect_11_hw;
if (joint_u32) {
uintptr_t lowbit = joint_u32 & (-joint_u32);
uint32_t write_idx = read_idx + PopcountWord(subset_bits & (lowbit - 1));
read_idx = write_idx + 1;
uint32_t subset_bits_write = subset_bits ^ lowbit;
uint16_t* sparse_vals_u16 = S_CAST(uint16_t*, sparse_vals);
subset_bits &= -(2 * lowbit);
for (; read_idx != orig_subset_size; ++read_idx) {
#ifdef USE_AVX2
if (!subset_bits) {
subset_alias[read_widx] = subset_bits_write;
do {
subset_bits = subset_alias[++read_widx];
} while (!subset_bits);
subset_bits_write = subset_bits;
detect_11 = genoarr[read_widx];
detect_11 = detect_11 & (detect_11 >> 1);
detect_11_hw = PackWordToHalfwordMask5555(detect_11);
}
lowbit = subset_bits & (-subset_bits);
subset_bits ^= lowbit;
if (lowbit & detect_11_hw) {
subset_bits_write ^= lowbit;
continue;
}
#else
if (!subset_bits) {
subset_alias[read_widx] = subset_bits_write;
do {
subset_bits = subset_alias[++read_widx];
} while (!subset_bits);
subset_bits_write = subset_bits;
detect_11 = genoarr[read_widx];
detect_11 = detect_11 & (detect_11 >> 1);
}
lowbit = subset_bits & (-subset_bits);
subset_bits ^= lowbit;
if ((lowbit * lowbit) & detect_11) {
subset_bits_write ^= lowbit;
continue;
}
#endif
sparse_vals_u16[write_idx++] = sparse_vals_u16[read_idx];
}
subset_alias[read_widx] = subset_bits_write;
*subset_sizep = write_idx;
return;
}
}
read_idx += PopcountWord(subset_bits);
if (read_idx == orig_subset_size) {
return;
}
}
}

double u127prod_diff_d(uint64_t plus_term0, uint64_t plus_term1, uint64_t minus_term0, uint64_t minus_term1) {
uint64_t plus_hi;
const uint64_t plus_lo = multiply64to128(plus_term0, plus_term1, &plus_hi);
Expand Down
86 changes: 82 additions & 4 deletions external_libs/pgenlib/include/pgenlib_misc.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#ifndef __PGENLIB_MISC_H__
#define __PGENLIB_MISC_H__

// This library is part of PLINK 2.0, copyright (C) 2005-2024 Shaun Purcell,
// This library is part of PLINK 2.0, copyright (C) 2005-2025 Shaun Purcell,
// Christopher Chang.
//
// This library is free software: you can redistribute it and/or modify it
Expand Down Expand Up @@ -74,17 +74,97 @@
// on the fly, since that tends to be faster than having to access twice as
// much memory.

#include <assert.h>
#ifndef NDEBUG
# include <stdarg.h> // va_start()
#endif
#include <stdlib.h>
#include <string.h>

#include "plink2_base.h"
#include "plink2_bits.h"

// 10000 * major + 100 * minor + patch
// Exception to CONSTI32, since we want the preprocessor to have access to this
// value. Named with all caps as a consequence.
#define PGENLIB_INTERNAL_VERNUM 2003
#define PGENLIB_INTERNAL_VERNUM 2100

#ifdef __cplusplus
namespace plink2 {
#endif

#ifdef NDEBUG
HEADER_INLINE BoolErr PglInitLog(__attribute__((unused)) uintptr_t log_capacity) {
return 0;
}

HEADER_INLINE void PglLogprintf(__attribute__((unused)) const char* fmt, ...) {
}

HEADER_INLINE char* PglReturnLog() {
return nullptr;
}

HEADER_INLINE void PglResetLog() {
}
#else
extern char* g_pgl_errbuf;
extern char* g_pgl_errbuf_write_iter;
extern char* g_pgl_errbuf_end;

HEADER_INLINE BoolErr PglInitLog(uintptr_t log_capacity) {
if (g_pgl_errbuf) {
if (log_capacity <= S_CAST(uintptr_t, g_pgl_errbuf_end - g_pgl_errbuf)) {
// existing allocation is fine. no need to shrink
g_pgl_errbuf_write_iter = g_pgl_errbuf;
*g_pgl_errbuf_write_iter = '\0';
return 0;
}
free(g_pgl_errbuf);
}
// 128 extra bytes to make WordWrapMultiline() safe.
g_pgl_errbuf = S_CAST(char*, malloc(log_capacity + 128));
if (!g_pgl_errbuf) {
g_pgl_errbuf_write_iter = nullptr;
g_pgl_errbuf_end = nullptr;
// in the unlikely event this comes up, caller is free to ignore the error,
// logging just won't happen. though, if malloc is failing, something else
// is likely to fail...
return 1;
}
g_pgl_errbuf_write_iter = g_pgl_errbuf;
*g_pgl_errbuf_write_iter = '\0';
g_pgl_errbuf_end = &(g_pgl_errbuf[log_capacity]);
return 0;
}

HEADER_INLINE void PglLogprintf(const char* fmt, ...) {
// possible todo: log levels
if (g_pgl_errbuf_write_iter != nullptr) {
va_list args;
va_start(args, fmt);
const uintptr_t remaining_space = g_pgl_errbuf_end - g_pgl_errbuf_write_iter;
const uintptr_t intended_slen = vsnprintf(g_pgl_errbuf_write_iter, remaining_space, fmt, args);
if (intended_slen < remaining_space) {
g_pgl_errbuf_write_iter = &(g_pgl_errbuf_write_iter[intended_slen]);
} else {
g_pgl_errbuf_write_iter = g_pgl_errbuf_end;
}
}
}

HEADER_INLINE char* PglReturnLog() {
return g_pgl_errbuf;
}

HEADER_INLINE void PglResetLog() {
if (g_pgl_errbuf) {
g_pgl_errbuf_write_iter = g_pgl_errbuf;
g_pgl_errbuf_write_iter[0] = '\0';
}
}
#endif

// other configuration-ish values needed by plink2_common subset
typedef unsigned char AlleleCode;
typedef uint16_t DoubleAlleleCode;
Expand Down Expand Up @@ -574,8 +654,6 @@ HEADER_INLINE uint32_t GenoIter1x(const uintptr_t* __restrict genoarr, uintptr_t
// sparse_vals entries.
void ClearGenoarrMissing1bit8Unsafe(const uintptr_t* __restrict genoarr, uint32_t* subset_sizep, uintptr_t* __restrict subset, void* __restrict sparse_vals);

void ClearGenoarrMissing1bit16Unsafe(const uintptr_t* __restrict genoarr, uint32_t* subset_sizep, uintptr_t* __restrict subset, void* __restrict sparse_vals);

// See EasyasPi's answer to
// https://stackoverflow.com/questions/25095741/how-can-i-multiply-64-bit-operands-and-get-128-bit-result-portably
HEADER_INLINE uint64_t multiply64to128(uint64_t lhs, uint64_t rhs, uint64_t* high) {
Expand Down
Loading