diff --git a/.github/workflows/build_wheels.yml b/.github/workflows/build_wheels.yml index 9fb319005..ecf9032cd 100644 --- a/.github/workflows/build_wheels.yml +++ b/.github/workflows/build_wheels.yml @@ -160,7 +160,7 @@ jobs: run: | echo "CI_ONLY" $CI_ONLY if [ "$CI_ONLY" == "true" ]; then - MATRIX="[{'only':'cp313-manylinux_x86_64','os':'ubuntu-20.04'},{'only':'cp313-manylinux_aarch64','os':'ubuntu-22.04-arm'},{'only':'cp313-musllinux_aarch64','os':'ubuntu-22.04-arm'},{'only':'cp313-win_amd64','os':'windows-2019'},{'only':'cp313-win_arm64','os':'windows-2019'},{'only':'cp313-macosx_x86_64','os':'macos-13'}, {'only':'cp313-macosx_arm64','os':'macos-14'}]" + MATRIX="[{'only':'cp313-manylinux_x86_64','os':'ubuntu-20.04'},{'only':'cp313-manylinux_aarch64','os':'ubuntu-22.04-arm'},{'only':'cp313-musllinux_x86_64','os':'ubuntu-22.04'},{'only':'cp313-musllinux_aarch64','os':'ubuntu-22.04-arm'},{'only':'cp313-win_amd64','os':'windows-2019'},{'only':'cp313-win_arm64','os':'windows-2019'},{'only':'cp313-macosx_x86_64','os':'macos-13'}, {'only':'cp313-macosx_arm64','os':'macos-14'}]" else MATRIX=$( { diff --git a/pyproject.toml b/pyproject.toml index ed5043b70..04f57b2a2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -88,7 +88,6 @@ skip = ["*-win32", "*_i686", # skip 32-bit builds "pp37-*", # skip certain PyPy configurations "pp*_aarch64 ", # no numpy wheels for aarch64 on PyPy "pp311-*", # no numpy wheels for PyPy 3.11 - "cp313-musllinux_x86_64", # no lxml wheels for musllinux_1_1 on cp313 "cp36-musllinux_*", "cp37-musllinux_*", "cp38-musllinux_*"] # older musllinux missing numpy wheels test-skip = ["*-win_arm64", "cp38-macosx_arm64"] test-extras = ["test"] @@ -109,6 +108,11 @@ build-frontend = { name = "pip", args = ["--only-binary=:none:"] } archs = ["auto", "aarch64"] # default linux wheels use CentOS-based runners, local cibuildwheel docker before-all = ["yum install -y gsl-devel"] +# force use of newer `manylinux_2_28`: in cibuildwheel >= 3.0 it will be the default +manylinux-x86_64-image = "manylinux_2_28" +manylinux-pypy_x86_64-image = "manylinux_2_28" +manylinux-aarch64-image = "manylinux_2_28" +manylinux-pypy_aarch64-image = "manylinux_2_28" [[tool.cibuildwheel.overrides]] # run the benchmarking only on cp313 @@ -121,14 +125,10 @@ test-command = "pytest --pval-benchmarking -v {package}/tests ${PYTEST_OPTIONS}" # musl uses apk/apt select = "*musllinux*" before-all = ["apk add gsl-dev"] -# restore musllinux_1_1 image for the time being (musllinux_1_2 segfaults)P -musllinux-x86_64-image="musllinux_1_1" - -[[tool.cibuildwheel.overrides]] -# use the recent `muslinux_1_2` on cp313 -# lxml for musllinux_1_1 does not exist for Python 3.13 -select = "cp313-musllinux_x86_64" -musllinux-x86_64-image="musllinux_1_2" +# FIXME: if we want to run the full tests, need to enable the below to force +# musllinux_1_1 image for the time being on x86_64 (musllinux_1_2 segfaults on +# any test involving emhaplofreq) +# musllinux-x86_64-image="musllinux_1_1" [tool.cibuildwheel.macos] # use oras to get archived 2.7.1 bottles of `gsl` that has 10.15 (on x86) / 11.0 (on arm64) builds diff --git a/src/PyPop/Haplo.py b/src/PyPop/Haplo.py index 64b1860f1..d1ec65285 100644 --- a/src/PyPop/Haplo.py +++ b/src/PyPop/Haplo.py @@ -372,7 +372,7 @@ def _runEmhaplofreq( permutationFlag=None, permutationPrintFlag=0, numInitCond=50, - numPermutations=1001, + numPermutations=1, numPermuInitCond=5, haploSuppressFlag=None, showHaplo=None, @@ -384,7 +384,8 @@ def _runEmhaplofreq( Format of 'locusKeys' is a string as per estHaplotypes(): - permutationFlag: sets whether permutation test will be - performed. No default. + performed. No default. This should only be set if + numPermutation is non-zero. - permutationPrintFlag: sets whether the result from permutation output run will be included in the output XML. @@ -394,7 +395,7 @@ def _runEmhaplofreq( performing the permutation test. Default: 50. - numPermutations: sets number of permutations that will be - performed if 'permutationFlag' *is* set. Default: 1001. + performed if 'permutationFlag' *is* set. Default: 1. - numPermuInitConds: sets number of initial conditions tried per-permutation. Default: 5. @@ -664,6 +665,7 @@ def allPairwise( else: permuMode = "no-permu" permutationFlag = 0 + numPermutations = 1 # FIXME: this translates to being max_permu in C program, needs to be at least one if mode is None: mode = "all-pairwise-ld-" + permuMode diff --git a/src/emhaplofreq/emhaplofreq.c b/src/emhaplofreq/emhaplofreq.c index 6dbdf18bd..41188f6c3 100644 --- a/src/emhaplofreq/emhaplofreq.c +++ b/src/emhaplofreq/emhaplofreq.c @@ -59,8 +59,8 @@ FILE *parse_args(int, char **, int *); /* argc, argv */ /* returns open filehandle of input file */ -int read_infile(FILE *, char[MAX_ROWS][NAME_LEN], - char[MAX_ROWS][MAX_COLS][NAME_LEN], int *, char[1], char[1]); +int read_infile(FILE *, char (*)[NAME_LEN], char (*)[MAX_COLS][NAME_LEN], int *, + char[1], char[1]); /* open filehandle for data, ref array, data array, number of records */ /* returns number of loci */ @@ -70,8 +70,8 @@ int main_proc( #else FILE *fp_out, #endif - char[MAX_ROWS][MAX_COLS][NAME_LEN], int, int, int, int, int, int, int, int, - int, char[1], char[1]); + char (*)[MAX_COLS][NAME_LEN], int, int, int, int, int, int, int, int, int, + char GENOTYPE_SEPARATOR[], char GENOTYPE_TERMINATOR[]); /* data array, number of loci, number of records */ /* main procedure that handles memory allocation and creation of arrays, * spawns the rest of the data preparation and processing functions, @@ -115,20 +115,21 @@ void linkage_diseq(FILE *, double *, int (*)[MAX_LOCI], double (*)[MAX_ALLELES], * compute LD coefficients */ -void sort2bychar(char (*)[], double *, int); +void sort2bychar(char (*)[LINE_LEN / 2], double *, int); /* haplo array, mle array, no. of haplotypes */ /* * insertion sort in ascending order for 1st array also applied to 2nd array */ -void sort2byfloat(char (*)[], double *, int); +void sort2byfloat(char (*)[LINE_LEN / 2], double *, int); /* haplo array, mle array, no. of haplotypes */ /* * insertion sort in ascending order for 2nd array also applied to 1st array */ void emcalc(int *, int *, double *, double *, int, int, int, int, int *, - int (*)[], int *, int *, double *, double *, int, int (*)[]); + int (*)[2], int *, int *, double *, double *, int, + int (*)[MAX_ROWS]); /* numgeno, obspheno, freq_zero, mle, n_haplo, n_unique_geno, n_unique_pheno, n_recs, xhaplo, xgeno, error_flag, iter_count, loglike, permu, gp */ @@ -136,14 +137,15 @@ void emcalc(int *, int *, double *, double *, int, int, int, int, int *, * perform EM iterations with results in the mle array */ -void haplo_freqs_no_ld(double *, double (*)[], int (*)[], int *, int, int); +void haplo_freqs_no_ld(double *, double (*)[MAX_ALLELES], int (*)[MAX_LOCI], + int *, int, int); /* freqs, allele_freq, haplocus, n_unique_allele, n_loci, n_haplo */ /* * compute haplotype frequencies under no LD as products of allele frequencies */ -double loglikelihood(int *, double *, int *, int, int, int, int *, int (*)[], - int, int (*)[]); +double loglikelihood(int *, double *, int *, int, int, int, int *, int (*)[2], + int, int (*)[MAX_ROWS]); /* numgeno, hap_freq, n_haplo, n_unique_geno, n_unique_pheno, xhaplo, xgeno, * permu, gp */ /* @@ -414,8 +416,13 @@ int main_proc( int permu_count; // RS 20031125 double lr_mean, lr_sd, lr_z; - CALLOC_ARRAY_DIM1(double, like_ratio, max_permu); - CALLOC_ARRAY_DIM1(int, error_flag_permu, max_permu); // RS 20031125 + /* FIXME: size of array needs be at least one because there is always at least + * one like_ratio */ + /* this should ideally use the logic further down which sets max_permutations + to 1 if permutation test not run */ + int permu_alloc_size = (max_permu > 0) ? max_permu : 1; + CALLOC_ARRAY_DIM1(double, like_ratio, permu_alloc_size); + CALLOC_ARRAY_DIM1(int, error_flag_permu, permu_alloc_size); // RS 20031125 double pvalue = 0.0; @@ -501,7 +508,8 @@ int main_proc( n_hetero++; if (strcmp(data_ar[0][col_0], data_ar[0][col_1]) > 0) { strcpy(buff, data_ar[0][col_0]); - strcpy(data_ar[0][col_0], data_ar[0][col_1]); + memmove(data_ar[0][col_0], data_ar[0][col_1], + strlen(data_ar[0][col_1]) + 1); strcpy(data_ar[0][col_1], buff); } } @@ -563,7 +571,8 @@ int main_proc( if ((strcmp(data_ar[obs][col_0], data_ar[obs][col_1])) > 0) { strcpy(buff, data_ar[obs][col_0]); - strcpy(data_ar[obs][col_0], data_ar[obs][col_1]); + memmove(data_ar[obs][col_0], data_ar[obs][col_1], + strlen(data_ar[obs][col_1]) + 1); strcpy(data_ar[obs][col_1], buff); } @@ -1380,7 +1389,7 @@ void linkage_diseq(FILE *fp_out, double(*mle), int (*hl)[MAX_LOCI], /* af: allele_frequencies array */ { int i, j, k, l, m, coeff_count = 0; - double dmax, norm_dij = 0.0; + double dmax = 0.0, norm_dij = 0.0; static double dij[MAX_LOCI * (MAX_LOCI - 1) / 2][MAX_ALLELES][MAX_ALLELES]; diff --git a/tests/base.py b/tests/base.py index 13feeee26..95af3523f 100644 --- a/tests/base.py +++ b/tests/base.py @@ -34,6 +34,7 @@ """This is a class of common functions for running PyPop tests""" import os.path +import platform import shutil import subprocess import sys @@ -49,6 +50,84 @@ reason="certain tests currently fail on windows due to minor numerical issues", ) + +def is_check_musllinux_enabled(): + return False + + +# FIXME: this is a somewhat hacky check to see if on musllinux +def is_musllinux(): + """Check if running on a musl-based Linux system.""" + + if sys.platform != "linux": + return False + + # Check if `ldd` output contains 'musl' + try: + output = subprocess.check_output( + ["ldd", "--version"], stderr=subprocess.STDOUT, text=True + ) + if "musl" in output: + return True + except Exception: + pass # `ldd` might not exist + + # Fallback: Check if running on Alpine (common musl distribution) + try: + with open("/etc/os-release") as f: + if "ID=alpine" in f.read(): + return True + except FileNotFoundError: + pass + + return False + + +def debug_musllinux_check(): + if not is_check_musllinux_enabled(): + return + + """Print debug info about musllinux detection.""" + print("=== musllinux detection debug ===") + print(f"sys.platform: {sys.platform}") + print(f"platform.libc_ver(): {platform.libc_ver()}") + print(f"platform.machine(): {platform.machine()}") + print("===============================") + + # Run `ldd --version` to check if it's musl + try: + output = subprocess.check_output( + ["ldd", "--version"], stderr=subprocess.STDOUT, text=True + ) + print("ldd --version output:") + print(output.strip()) + except Exception as e: + print(f"ldd check failed: {e}") + + # Check `/etc/os-release` + try: + with open("/etc/os-release") as f: + print("/etc/os-release contents:") + print(f.read().strip()) + except FileNotFoundError: + print("/etc/os-release not found") + + print("===============================") + + if is_musllinux() and platform.machine() == "x86_64": + print("Skipping test due to musllinux_1_2 on x86_64") + + +# call the debug function before applying the skip +debug_musllinux_check() + +# global skip condition for musllinux on x86_64 +# FIXME: currently disabled, to re-enable, change "False" to "True" in condition +skip_musllinux_x86_64 = pytest.mark.skipif( + is_check_musllinux_enabled() and is_musllinux() and platform.machine() == "x86_64", + reason="certain tests segfault or fail on musllinux/x86_64, so skipping for now", +) + CUR_DIR = Path(__file__).parent.resolve() PARENT_DIR = Path(CUR_DIR) / ".." sys.path.append(PARENT_DIR) diff --git a/tests/test_AlleleColon.py b/tests/test_AlleleColon.py index c2b5697a1..63c26dd4f 100644 --- a/tests/test_AlleleColon.py +++ b/tests/test_AlleleColon.py @@ -3,6 +3,7 @@ from base import ( in_temp_dir, # noqa: F401 run_pypop_process, + skip_musllinux_x86_64, ) @@ -21,6 +22,7 @@ def test_AlleleColon_HardyWeinberg(): ) +@skip_musllinux_x86_64 def test_AlleleColon_Emhaplofreq(): exit_code = run_pypop_process( "./tests/data/Test_Allele_Colon_Emhaplofreq.ini", diff --git a/tests/test_ManyOptions.py b/tests/test_ManyOptions.py index 389c6b30c..63f3a17ec 100644 --- a/tests/test_ManyOptions.py +++ b/tests/test_ManyOptions.py @@ -4,10 +4,12 @@ from base import ( in_temp_dir, # noqa: F401 run_pypop_process, + skip_musllinux_x86_64, ) @pytest.mark.slow +@skip_musllinux_x86_64 def test_ManyOptions(): generated_filenames = [ "BIGDAWG_SynthControl_Data_with_metadata-" + suffix diff --git a/tests/test_USAFEL.py b/tests/test_USAFEL.py index c6baafb88..6a311db76 100644 --- a/tests/test_USAFEL.py +++ b/tests/test_USAFEL.py @@ -9,6 +9,7 @@ filecmp_list_of_files, in_temp_dir, # noqa: F401 run_pypop_process, + skip_musllinux_x86_64, xfail_windows, ) @@ -83,6 +84,7 @@ def test_USAFEL_slatkin_guothompson(): assert filecmp_ignore_newlines(out_filename, gold_out_filename) +@skip_musllinux_x86_64 def test_USAFEL_slatkin_guothompson_emhaplofreq(): exit_code = run_pypop_process( "./tests/data/minimal.ini", "./tests/data/USAFEL-UchiTelle-small.pop" @@ -98,6 +100,7 @@ def test_USAFEL_slatkin_guothompson_emhaplofreq(): # FIXME: error in one-line of 2-locus-haplo.tsv on Windows # ld.d is 0.01563 rather than 0.01562 @xfail_windows +@skip_musllinux_x86_64 def test_USAFEL_slatkin_guothompson_emhaplofreq_with_permu_tsv(): exit_code = run_pypop_process( "./tests/data/minimal-with-permu.ini", diff --git a/tests/test_UserGuideExamples.py b/tests/test_UserGuideExamples.py index f833b1756..3ebf596dd 100644 --- a/tests/test_UserGuideExamples.py +++ b/tests/test_UserGuideExamples.py @@ -1,9 +1,11 @@ from base import ( in_temp_dir, # noqa: F401 run_pypop_process, + skip_musllinux_x86_64, ) +@skip_musllinux_x86_64 def test_DataMinimalNoHeaderNoIDs(): exit_code = run_pypop_process( "./tests/data/doc-examples/config-minimal-example.ini", @@ -13,6 +15,7 @@ def test_DataMinimalNoHeaderNoIDs(): assert exit_code == 0 +@skip_musllinux_x86_64 def test_DataMinimalNoHeader(): exit_code = run_pypop_process( "./tests/data/doc-examples/config-minimal-example.ini",