From c0aa41b6d4df497b6f9522cebd9940642da3c3de Mon Sep 17 00:00:00 2001
From: tmsmiller <1tmsmiller@gmail.com>
Date: Thu, 4 Dec 2025 16:38:19 -0600
Subject: [PATCH 1/8] Add files via upload

---
 msDMS_Saleem&Miller_2025/2b+c_final.py        | 133 +++++++
 msDMS_Saleem&Miller_2025/2d_final.py          |  75 ++++
 msDMS_Saleem&Miller_2025/3b_final.py          |  80 ++++
 msDMS_Saleem&Miller_2025/3d_calcs_final.py    | 103 +++++
 msDMS_Saleem&Miller_2025/4c+e.py              | 367 ++++++++++++++++++
 msDMS_Saleem&Miller_2025/Figure_1B_final.py   |  42 ++
 msDMS_Saleem&Miller_2025/SI1_C_final.py       |  46 +++
 msDMS_Saleem&Miller_2025/SIfig1_1b_final.py   | 108 ++++++
 msDMS_Saleem&Miller_2025/SIfig1_1d_final.py   |  27 ++
 msDMS_Saleem&Miller_2025/figure1c_final.py    |  35 ++
 msDMS_Saleem&Miller_2025/random_ringsample.py | 103 +++++
 .../si1_e_is_corrcode.txt                     |   0
 .../si2_abcf_is corrcode.txt                  |   0
 msDMS_Saleem&Miller_2025/si2_d+e_final.py     |  67 ++++
 msDMS_Saleem&Miller_2025/si2_g_final.py       |  60 +++
 msDMS_Saleem&Miller_2025/si2_h+i_final.py     | 238 ++++++++++++
 msDMS_Saleem&Miller_2025/si3_ac_calcs.py      |  96 +++++
 msDMS_Saleem&Miller_2025/si3_f_final.py       |  69 ++++
 msDMS_Saleem&Miller_2025/si6_a_final.py       |  99 +++++
 19 files changed, 1748 insertions(+)
 create mode 100644 msDMS_Saleem&Miller_2025/2b+c_final.py
 create mode 100644 msDMS_Saleem&Miller_2025/2d_final.py
 create mode 100644 msDMS_Saleem&Miller_2025/3b_final.py
 create mode 100644 msDMS_Saleem&Miller_2025/3d_calcs_final.py
 create mode 100644 msDMS_Saleem&Miller_2025/4c+e.py
 create mode 100644 msDMS_Saleem&Miller_2025/Figure_1B_final.py
 create mode 100644 msDMS_Saleem&Miller_2025/SI1_C_final.py
 create mode 100644 msDMS_Saleem&Miller_2025/SIfig1_1b_final.py
 create mode 100644 msDMS_Saleem&Miller_2025/SIfig1_1d_final.py
 create mode 100644 msDMS_Saleem&Miller_2025/figure1c_final.py
 create mode 100644 msDMS_Saleem&Miller_2025/random_ringsample.py
 create mode 100644 msDMS_Saleem&Miller_2025/si1_e_is_corrcode.txt
 create mode 100644 msDMS_Saleem&Miller_2025/si2_abcf_is corrcode.txt
 create mode 100644 msDMS_Saleem&Miller_2025/si2_d+e_final.py
 create mode 100644 msDMS_Saleem&Miller_2025/si2_g_final.py
 create mode 100644 msDMS_Saleem&Miller_2025/si2_h+i_final.py
 create mode 100644 msDMS_Saleem&Miller_2025/si3_ac_calcs.py
 create mode 100644 msDMS_Saleem&Miller_2025/si3_f_final.py
 create mode 100644 msDMS_Saleem&Miller_2025/si6_a_final.py

diff --git a/msDMS_Saleem&Miller_2025/2b+c_final.py b/msDMS_Saleem&Miller_2025/2b+c_final.py
new file mode 100644
index 0000000..f90b8ef
--- /dev/null
+++ b/msDMS_Saleem&Miller_2025/2b+c_final.py
@@ -0,0 +1,133 @@
+from ReactivityProfile import ReactivityProfile as rprof
+import matplotlib.pyplot as plot
+import numpy as np
+import argparse
+plot.rcParams['pdf.fonttype'] = 42
+plot.rcParams['font.sans-serif'] = 'Arial'
+
+
+def reactivityByNt(self, resnums = None, nts=None, name = None):
+        """ return a list of reactivities for a given set of nts (array), or nt type"""
+        
+        pro = self.profile(name)
+        
+        mask = np.isfinite(pro)
+        #with np.errstate(invalid='ignore'):
+        #    mask = mask & (pro > -0.3) & (pro < 4.0)
+
+        try:
+            ntit = iter(nts)
+            ntmask = (self.sequence == next(ntit))
+            for n in ntit:
+                ntmask = ntmask | (self.sequence == n) 
+            
+            mask = mask & ntmask
+        except TypeError:
+            pass
+
+        try:
+            resnums = set(resnums)
+            mask = mask & np.array([i in resnums for i in self.nts])
+        except TypeError:
+            pass
+
+        return pro[mask]
+
+
+def mutHistogram(self, name = None,name_2 = None, name_3=None, nts = None, resnums = None,
+                     bins=100,axes = None, pool=False, writename='mutHist.pdf', **kwargs):
+        
+
+        
+        write = False
+        if axes is None:
+            fig, axes = plot.subplots()
+            write = True
+
+        if pool:
+            rxn = ((self.reactivityByNt(name='norm', nts='G', resnums = resnums)))
+            rxn_2 = ((name.reactivityByNt(name='norm', nts='G', resnums = resnums)))
+            rxn_3 = ((name_2.reactivityByNt(name='norm', nts='G', resnums = resnums)))
+            rxn_4 = ((name_3.reactivityByNt(name='norm', nts='G', resnums = resnums)))
+            
+            bins=np.histogram(np.hstack((rxn,rxn_2,rxn_3)), bins=(np.arange(0.0, 10)))[1]
+            
+            axes.hist(rxn, histtype = 'step', bins = bins, label='InCell', **kwargs)
+            
+            axes.hist(rxn_2, histtype= 'step', bins=bins, label='Cell Free', **kwargs)
+            
+            axes.hist(rxn_3, histtype= 'step', bins=bins, label='Urea', **kwargs)
+            axes.hist(rxn_4, histtype= 'step', bins=bins, label='Cell Free -Mg', **kwargs)
+        
+        else:
+        
+            rxn = ((self.reactivityByNt(name='norm', nts='G', resnums = resnums)))        
+            axes.hist(rxn, histtype = 'step', bins = bins, label='In Cell', **kwargs)
+
+            if name is not None:
+
+                rxn_2 = ((name.reactivityByNt(name='norm', nts='G', resnums = resnums)))
+
+                axes.hist(rxn_2, histtype= 'step', bins=bins, label='Cell Free', **kwargs)
+        
+            if name_2 is not None:
+
+                rxn_3 = ((name_2.reactivityByNt(name='norm', nts='G', resnums = resnums)))
+
+                axes.hist(rxn_3, histtype= 'step', bins=bins, label='Urea', **kwargs)
+            if name_3 is not None:
+
+                rxn_4 = ((name_3.reactivityByNt(name='norm', nts='G', resnums = resnums)))
+
+                axes.hist(rxn_4, histtype= 'step', bins=bins, label='Cell Free -Mg', **kwargs)
+
+
+        
+
+        axes.legend(loc='upper left')
+        axes.set_ylabel('Count')
+        axes.set_xlabel('Norm N7 reactivity')
+        axes.set_xlim(-3.0, 3.0)
+        axes.set_ylim(0,350)
+        axes.yaxis.set_ticks(np.arange(0, 375, 25))
+        axes.set_title('23s')
+
+        if write:
+            plot.savefig(writename)
+
+
+
+
+def winsorize(prof):
+    for i, v in enumerate(prof):
+        if prof[i] > 3:
+            prof[i] = 3
+        if prof[i] < -3:
+            prof[i] = -3
+
+    return prof
+
+
+if __name__ == '__main__':
+    ntorder = ('A','C','G','U')
+
+    masknt = ('A', 'C', 'U')
+    argparser = argparse.ArgumentParser()
+    argparser.add_argument('--incell')
+    argparser.add_argument('--cf')
+    argparser.add_argument('--cfnomg')
+    argparser.add_argument('--urea')
+    args = argparser.parse_args()
+    #read in reactivity profiles
+    incell = rprof(args.incell)
+    cellfree = rprof(args.cf)
+    cf_nomg = rprof(args.cfnomg)
+    urea = rprof(args.urea)
+
+    incell.normprofile = winsorize(incell.normprofile)
+    cellfree.normprofile = winsorize(cellfree.normprofile)
+    urea.normprofile = winsorize(urea.normprofile)
+    cf_nomg.normprofile= winsorize(cf_nomg.normprofile)
+
+    #pass in incell first, then denatured
+    mutHistogram(incell,cellfree, urea,cf_nomg, bins=np.linspace(-3.0, 3.0, num=25),writename ='Fig2B_23s_InCellvsCFvsUrea_norm_top_08052024.pdf')
\ No newline at end of file
diff --git a/msDMS_Saleem&Miller_2025/2d_final.py b/msDMS_Saleem&Miller_2025/2d_final.py
new file mode 100644
index 0000000..31e4df4
--- /dev/null
+++ b/msDMS_Saleem&Miller_2025/2d_final.py
@@ -0,0 +1,75 @@
+from sklearn.metrics import roc_curve, roc_auc_score
+import matplotlib.pyplot as plot
+import numpy as np
+import argparse
+
+from ReactivityProfile import ReactivityProfile as rprofile
+
+argparser = argparse.ArgumentParser()
+argparser.add_argument('--n7prof')
+argparser.add_argumnet('--sasa')
+args = argparser.parse_args()
+
+
+
+n7 = rprofile(args.n7prof)
+
+infile = args.sasa
+
+
+# read in the sasa file output from pymol script
+sasa = {}
+with open(infile) as inp:
+    for line in inp:
+        spl = line.split()
+        sasa[int(spl[0])] = float(spl[1])
+
+
+prot_cut = 1.6
+sasa_cut = 1
+
+x = 0
+protected = {}
+cfprotected = {}
+while x < len(n7.normprofile):
+    ##loop through the normprofile and take any normalized values above what we consider "protected"
+    if np.isfinite(n7.normprofile[x]) and n7.normprofile[x] >= prot_cut and n7.sequence[x] == 'G' and n7.nts[x] in sasa:
+        protected[n7.nts[x]] = n7.normprofile[x]
+    x += 1
+
+
+positives = {}
+negatives = {}
+for i in sasa:
+        ##if measured area is greater than our cutoff, it is unprotected and therefore a "negative"
+        if sasa[i] >= sasa_cut:
+            negatives[i] = sasa[i]
+        ##if measured area is lower than our cutoff it is protected and therefore "positive"
+        if sasa[i] < sasa_cut:
+            positives[i] =  sasa[i]
+
+TP = 0
+FP = 0
+TN = 0
+FN = 0
+for i in sasa:
+    ##compare our dict of protected values against the positive and negatives at every position, tick up appropriate bin
+    if i in protected and i in positives:
+        TP += 1
+    if i in protected and i in negatives:
+        FP += 1
+    if i in positives and i not in protected:
+        FN += 1
+    if i in negatives and i not in protected:
+        TN += 1
+
+print(' TP: {}\n FP: {}\n TN: {}\n FN: {}\n'.format(TP, FP, TN, FN))
+print('Sensitivity: {}\n'.format(TP/(TP+FN)))
+print('Specificity: {}\n'.format(TN/(TN+FP)))
+print('PPV: {}\n'.format(TP/(TP+FP)))
+print('NPV: {}\n'.format(TN/(TN+FN)))
+print('Diagnostic Accuracy: {}'.format((TP + TN)/(TP + FP + FN + TN))) 
+print('Protection cutoff: {}\nSASA cutoff:{}\n'.format(prot_cut,sasa_cut))
+print(len(sasa))
+print(TP+FP+TN+FN)
+
diff --git a/msDMS_Saleem&Miller_2025/3b_final.py b/msDMS_Saleem&Miller_2025/3b_final.py
new file mode 100644
index 0000000..51e1989
--- /dev/null
+++ b/msDMS_Saleem&Miller_2025/3b_final.py
@@ -0,0 +1,80 @@
+import random
+import numpy as np
+from ReactivityProfile import ReactivityProfile as rprofile
+import RNAStructureObjects as RNAtools
+
+import matplotlib.pyplot as plot
+import scipy.stats
+
+import sys, os, math
+import argparse
+plot.rcParams['pdf.fonttype'] = 42
+plot.rcParams['font.sans-serif'] = 'Arial'
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--real', required=True, help='path to profile')
+parser.add_argument('--random', required=True)
+parser.add_argument('--output', required=True, help='output prefix')
+parser.add_argument('--random2', required=True, help='2nd random dist')
+parser.add_argument('--real2', required=True, help='2nd real dist')
+args = parser.parse_args()
+
+
+def contact_filter(protections,CT1, filter_distance=0):
+    
+    num_within_range = {}
+    for i in protections:
+        count = 0
+        for j in protections:
+            if i == j:
+                continue
+            if CT1.contactDistance(i, j) > filter_distance:
+                
+                continue
+            else:
+                count += 1
+        num_within_range[i] = count
+    return num_within_range
+
+def parse_infile(input_f):
+    nts = {}
+    with open(input_f) as inp:
+        for i,line in enumerate(inp):
+            if i == 0:
+                continue
+            else:
+                spl = line.split()
+                if spl[2] == '0.0':
+                    continue
+                else:
+                    nts[int(spl[0])] = int(spl[1])
+    return nts
+
+def violin_plot(name, labels, data, p):
+    quants = []
+    for i in data:
+        quants.append([.25,.75])
+
+    fig, ax = plot.subplots(nrows = 1, ncols = 1)
+    ax.violinplot(data, showmedians=True)
+    ax.set_xticks(np.arange(1, len(labels) + 1), labels=labels)
+    #ax.set_ylim(0, 40)
+    ax.set_ylabel('number of protections within 20A')
+    ax.set_title('P = {}'.format(p))
+    fig.tight_layout(pad = 2)
+    fig.savefig(name + '.pdf')
+
+
+random_counts = parse_infile(args.random)
+counts = parse_infile(args.real)
+random_counts2 = parse_infile(args.random2)
+counts2 = parse_infile(args.real2)
+
+x_labels = ['16s Observed', '23s real', '16s Random Distribution', '23s random']
+data_toplot = [list(counts.values()), list(counts2.values()), list(random_counts.values()), list(random_counts2.values())]
+u, p = scipy.stats.mannwhitneyu(list(counts.values()), list(random_counts.values()), alternative='two-sided')
+u2, p2 = scipy.stats.mannwhitneyu(list(counts2.values()), list(random_counts2.values()), alternative='two-sided')
+print(p)
+print(p2)
+
+violin_plot(args.output, x_labels, data_toplot, str(p) + '\t' + str(p2))
\ No newline at end of file
diff --git a/msDMS_Saleem&Miller_2025/3d_calcs_final.py b/msDMS_Saleem&Miller_2025/3d_calcs_final.py
new file mode 100644
index 0000000..944f202
--- /dev/null
+++ b/msDMS_Saleem&Miller_2025/3d_calcs_final.py
@@ -0,0 +1,103 @@
+import argparse
+import matplotlib.pyplot as plot
+import numpy as np
+import random
+import copy
+
+plot.rcParams['pdf.fonttype'] = 42
+plot.rcParams['font.sans-serif'] = 'Arial'
+
+def read_prot_nts(fname):
+    sizes = []
+    with open(fname) as inp:
+        for line in inp:
+            sizes.append(int(line.strip()))
+    return sizes
+
+def read_primary_nts(fname):
+    p_locus = []
+    with open(fname) as inp:
+        for line in inp:
+            spl = line.split('-')
+            for i in range(int(spl[0]), int(spl[1]) + 1):
+                p_locus.append(i)
+    return p_locus
+
+def sample_fold(size_l,loci_nts, rna):
+    rna_copy = copy.deepcopy(rna)
+    length = len(rna_copy)
+    percent_cover = []
+    encapsulated = 0
+    selected = {}
+    for i in size_l:
+        nts_in = []
+        selected[i] = set()
+        while len(selected[i]) == 0:
+            prospective = set()
+            start = random.choice(rna_copy)
+            end = start + i
+            if end > length:
+                continue
+            for j in range(start, end + 1):
+                prospective.add(j)
+
+
+            if prospective.issubset(set(rna_copy)):
+                selected[i] = prospective
+        for j in selected[i]:
+            rna_copy.remove(j)
+            if j in loci_nts:
+                nts_in.append(j)
+        cover = len(nts_in) / len(selected[i])
+        percent_cover.append(cover)
+        if cover == 1.0:
+            encapsulated += 1
+
+    return encapsulated
+
+def histogram(en, label = '', title='',name='sfd_hist.pdf'):
+    fig, ax = plot.subplots()
+    ax.hist(en, histtype = 'bar', bins=[0,1,2,3,4,5,6,7,8,9,10],label=label, align='left')
+    ax.axvline(x=np.percentile(en, [50]), label=label + " median " + str(np.percentile(en, [50])))
+    ax.axvline(x=np.mean(en), label=label + " mean " + str(np.mean(en)))
+    ax.axvline(x=6, label='Observed')
+
+    
+    ax.set_ylabel('Count')
+    ax.set_xlabel('# of SFD encapsulated')
+    ax.set_xticks(range(10))
+    
+    ax.set_title(title)
+    ax.legend(loc='upper right')
+    fig.tight_layout()
+    plot.savefig(name)
+
+if __name__ == '__main__':
+    #1542 16s
+    #2904 23s
+    prot_nts = read_prot_nts('23s_selffoldingnts.txt')
+    prim_nts = read_primary_nts('23s_primarynts.txt')
+    #prot_nts = read_prot_nts('16s_selffoldingnts.txt')
+    #prim_nts = read_primary_nts('16s_primarynts.txt')
+    ssu = []
+    lsu = []
+    
+    #for i in range(1, 1542 + 1):
+    #    ssu.append(i)
+    
+    for i in range(1, 2904 + 1):
+        ssu.append(i)
+
+    #lsu_en = []
+    ssu_en = []
+
+    while len(ssu_en) < 10000:
+        ssu_en.append(sample_fold(prot_nts, prim_nts, ssu))
+
+    hit = []
+    for i in ssu_en:
+        if i == 6:
+            hit.append(i)
+    print(len(hit)/ len(ssu_en))
+    histogram(ssu_en, title='23s', name='23s_10k_sfd_hist.pdf')
+    
diff --git a/msDMS_Saleem&Miller_2025/4c+e.py b/msDMS_Saleem&Miller_2025/4c+e.py
new file mode 100644
index 0000000..d625780
--- /dev/null
+++ b/msDMS_Saleem&Miller_2025/4c+e.py
@@ -0,0 +1,367 @@
+from sklearn.metrics import roc_curve, roc_auc_score
+
+import numpy as np
+
+from ReactivityProfile import ReactivityProfile as rprofile
+import RNAStructureObjects as RNAtools
+import matplotlib as mpl
+import matplotlib.pyplot as plot
+mpl.use('Agg')
+mpl.rcParams['pdf.fonttype'] = 42
+mpl.rcParams['font.sans-serif'] ='Arial'
+
+import sys, os, math
+import argparse
+
+
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--pdbfile', required=True, help='path to crystal structure')
+parser.add_argument('--chain', type=str, required=True, help='RNA chain id in crystal structure')
+parser.add_argument('--inputRings', required=True, help='path to ringfile')
+parser.add_argument('--adjustFrame', type=int, default=0, help='adjust frame of profile to match crystal structure if necessary')
+parser.add_argument('--profile', required=True, help='path to profile')
+parser.add_argument('--ringType', choices=('N1', 'N1N7', 'N7'), required=True, help='type of rings to plot')
+parser.add_argument('--crystalStart', type=int, required=True, help='first position of the RNA chain')
+parser.add_argument('--crystalEnd', type=int, required=True, help='last position of the RNA chain')
+parser.add_argument('--outputName', required=True, help='Output prefix for pymol session')
+parser.add_argument('--contactFilter', type=int, default=0, help='contact filter distance')
+parser.add_argument('--concatenated', action='store_true', help='plot all types of rings')
+parser.add_argument('--filterNegative', action='store_true', help='filter negative rings')
+parser.add_argument('--ctFile', required=True, help='path to ct file')
+parser.add_argument('--APCcutoff', type=int, default=2, help='filter rings with a score lower than or equal to this number')
+parser.add_argument('--approx', action='store_true', help='draw ring to closest nt if nt is missing from crystal structure')
+parser.add_argument('--renderall', action='store_true', help='render all the rings if a total concat file is passed')
+parser.add_argument('--distanceFilter', type=int, default=99999, help='check average contact distance of things within filter(angstroms)')
+args = parser.parse_args()
+
+
+
+'''
+tool for parsing ring data and drawing it onto a crystal structure
+
+takes a profile, ringfile
+takes a chain of interest for a crystal structure
+takes first and last residue numbers of that chain to get things in frame
+takes a frameshift integer
+'''
+
+cmd.load(args.pdbfile)
+
+profile = rprofile(args.profile)
+
+rings = []
+residues = []
+
+def adjust_frame(profile, ring_list, difference):
+    profile.nts = [x + difference for x in profile.nts]
+    for i in ring_list:
+        
+        i[0] += difference
+        i[1] += difference
+
+
+def parse_ringmap(cutoff, name='', filterneg=False):
+    rings = []
+    with open(name) as inp:
+        for p, line in enumerate(inp):
+            if p == 0 or p == 1:
+                continue
+            else:
+                split = line.split()
+                #split[2] for sig split[9] for alpha
+                if float(split[9]) <= cutoff:
+                    continue
+                    
+                if filterneg:
+                    if split[3] == '-1':
+                        
+                        continue
+                rings.append([int(split[0]), int(split[1]), float(split[9])])
+    return rings
+
+def contact_filter(inrings, filter_distance=0, ctfile=None):
+    
+    rings = []
+    CT1 = None
+    if ctfile is not None:
+        CT1 = RNAtools.CT(ctfile)
+    
+    for i in inrings:
+            #print(CT1.contactDistance(i[0], i[1]))
+            #print(str(i[0]) + ' ' + str(i[1]))
+            if CT1.contactDistance(i[0], i[1]) <= filter_distance:
+                #print('filtered')
+                continue
+            else:
+                
+                i.append(CT1.contactDistance(i[0],i[1]))
+                print("contact filter add {}".format(i))
+                rings.append(i)
+
+    return rings
+
+def split_concat(concat_list, profile):
+    last =  profile.nts[-1]
+    n1_rings = []
+    n1n7_rings = []
+    n7_rings = []
+    for i in concat_list:
+        if i[0] > last and i[1] > last:
+            i[0] -= last
+            i[1] -= last
+            n7_rings.append(i)
+        elif i[0] < last and i[1] < last:
+            n1_rings.append(i)
+        else:
+            n1n7_rings.append(i)
+            if i[0] > last:
+                i[0] -= last
+            else:
+                i[1] -= last
+    print(len(n1_rings), len(n1n7_rings), len(n7_rings))
+    return(n1_rings, n1n7_rings, n7_rings)
+
+def closest(lst, K):
+     
+    return lst[min(range(len(lst)), key = lambda i: abs(lst[i]-K))]
+
+
+def draw(ring_list, chain, crystalstart, crystalend, approx, ringtype, render=False, distancefilter=0):
+    distances = []
+    scores = []
+
+    
+    #get all the residues in a chain. some crystal structures are missing residues in the middle
+    
+    dchain = 'chain {}'.format(chain)
+    cmd.iterate(dchain,'residues.append(int(resi))')
+    fresidues = [*set(residues)]
+    #print('residues {}'.format(fresidues))
+    
+    
+    if ringtype == 'N1':
+        colors = [(44,123,182), (44,123,182), (171,217,233), (255,255,255), (253,174,97), (215,25,28), (215,25,28)]
+    else:
+        colors = [(91, 218, 190), (91, 218, 190), (0, 95, 154), (255, 255, 255), (218, 91, 172), (83, 48, 95), (83, 48, 95)]
+
+    #change to (20,100) for sig
+    cutoffs = [-1e10, -5, -2, 0, 2, 5, 1e10]
+    #alpha = [1.0, 1.0, 0.0, 0.0, 1.0, 1.0]
+    gradiant = [False, True, False, False, True, False]
+
+    for v,i in enumerate(ring_list):
+        linestart = i[0]
+        startstr = 'start_{}'.format(v+1)
+        endstr = 'end_{}'.format(v+1)
+        lineend = i[1]
+
+        
+        if linestart not in fresidues:
+            if approx:
+                linestart = closest(fresidues, linestart)
+            else:
+                continue
+        if lineend not in fresidues:
+            if approx:
+                lineend = closest(fresidues, lineend)
+            else:
+                continue
+
+        if linestart  <= crystalstart or lineend <= crystalstart:
+            
+            continue
+        elif linestart >= crystalend or lineend >= crystalend:
+            
+            continue
+
+        else:
+            distance = cmd.get_distance(atom1='chain {} and resi {} and name P'.format(chain, linestart), atom2='chain {} and resi {} and name P'.format(chain, lineend))
+            
+            if distance < distancefilter:
+                try:
+                    print(i)
+                    scores.append(i[3] / distance)
+                except:
+                    print('FAIL at ring {}'.format(i))
+            
+
+            distances.append(distance)
+            
+            for ind in range(len(cutoffs) - 1):
+                if cutoffs[ind] < i[2] <= cutoffs[ind + 1]:
+                    if gradiant[ind]:
+                        calccolor = colorGrad(i[2], colors[ind], colors[ind + 1], cutoffs[ind], cutoffs[ind + 1])
+                    else:
+                        calccolor = colors[ind]
+
+            cmd.set_color("color {}".format(v+1),calccolor)
+
+            cmd.select(startstr, 'chain {} and resi {} and name P'.format(chain, linestart))
+            start = cmd.get_coords('chain {} and resi {} and name P'.format(chain, linestart), 1)
+            
+            cmd.pseudoatom('points', name='start_{}'.format(v+1), pos=tuple(start[0]), color="color {}".format(v+1))
+        
+            cmd.deselect()
+        
+            cmd.select(endstr, 'chain {} and resi {} and name P'.format(chain, lineend))
+            end = cmd.get_coords('end_{}'.format(v+1), 1)
+            cmd.pseudoatom('points', name='end_{}'.format(v+1), pos=tuple(end[0]), color="color {}".format(v+1))
+
+            cmd.deselect()
+            if render:
+                print('rendering rings')
+                cmd.bond('points and name start_{}'.format(v+1), 'points and name end_{}'.format(v+1))
+
+    print("scores {}".format(scores))
+    try:
+        print("average score {}".format(sum(scores)/len(scores)))
+    except:
+        print('no rings in category')
+    return distances
+
+
+def filter_unprot_pos(ring_list, gaprofile):
+    '''
+    should be run after adjust_frame if applicable
+    '''
+
+    normvalues = {}
+    for i, v in enumerate(gaprofile.nts):
+        if np.isfinite(gaprofile.normprofile[i]):
+            normvalues[v] =  gaprofile.normprofile[i]
+
+    for i in ring_list:
+        if i[0] in normvalues and normvalues[i[0]] <= 2:
+            ring_list.remove(i)
+        if i[1] in normvalues and normvalues[i[1]] <= 2:
+            ring_list.remove(i)
+
+    return ring_list
+
+
+def filter_sig(ring_list, cutoff):
+    for i in ring_list:
+        if float(i[3]) < cutoff:
+            ring_list.remove(i)
+    return ring_list
+
+def filter_unconcat(concat_list, unconcat_list):
+    print('concat length {}'.format(len(concat_list)))
+    num_matches = 0
+    
+    non_matches = []
+    original = concat_list.copy()
+    for i in unconcat_list:
+        if i in original:
+            num_matches += 1
+            concat_list.remove(i)
+            
+        elif i not in original:
+            non_matches.append(i)
+    
+
+    for i in non_matches:
+        i[0] += 1
+    for i in non_matches:
+        if i in concat_list:
+            num_matches += 1
+    print(concat_list, '\n')
+    print(non_matches)
+    print('{} matches'.format(num_matches))
+
+    print(len(unconcat_list))
+    print("{} filtered length".format(len(concat_list)))
+    return concat_list
+
+
+
+def dist_histogram(dist_list, bins=10, title='',name='dist_hist.pdf'):
+    fig, ax = plot.subplots()
+    label = ''
+    colors = ['blue', 'orange', 'green']
+    for i, v in enumerate(dist_list):
+        if len(v) <= 0:
+            continue
+        if i == 0:
+            label = "N7"
+        elif i == 1:
+            label = 'N1'
+        elif i == 2:
+            label = "N7"
+        
+        ax.hist(v, histtype = 'step', bins = bins, label=label)
+        ax.axvline(x=np.percentile(v, [50]), color=colors[i],label=label + " median " + str(np.percentile(v, [50])))
+        ax.axvline(x=np.mean(v), color=colors[i],label=label + " mean " + str(np.mean(v)))
+
+    
+    ax.set_ylabel('Count')
+    ax.set_xlabel('P-P Distance A')
+    ax.set_xlim(0, 70)
+    
+    #ax.set_ylim(20,150)
+    ax.set_title(title)
+    ax.legend(loc='upper right')
+    fig.tight_layout()
+    plot.savefig(name)
+
+def colorGrad(value, colorMin, colorMax, minValue, maxValue, log=False):
+        """ return an interpolated rgb color value """
+        
+        if log:
+            value = 10**-value
+            minValue = 10**-minValue
+            maxValue = 10**-maxValue
+
+        if value > maxValue:
+            return colorMax
+        elif value < minValue:
+            return colorMin
+        else:
+            v = value - min(maxValue, minValue)
+            v /= abs(maxValue-minValue)
+        
+            col = []
+            for i in range(3):
+                col.append( v*(colorMax[i] - colorMin[i]) + colorMin[i] )
+            
+            return col
+    
+
+rings = parse_ringmap(args.APCcutoff, args.inputRings, args.filterNegative)
+N1_rings, N1N7_rings, N7_rings = split_concat(rings, profile)
+
+if args.concatenated:
+    args.ringType= 'N7'
+    N7_rings.extend(N1N7_rings)
+    #N1_rings.extend(N7_rings)
+    pass
+
+rings_toplot = []
+if args.ringType == 'N7':
+    rings_toplot = N7_rings.copy()
+elif args.ringType == 'N1N7':
+    rings_toplot = N1N7_rings.copy()
+else:
+    rings_toplot = N1_rings.copy()
+
+print('precf {}'.format(rings_toplot))
+if args.contactFilter > 0:
+    rings_toplot = contact_filter(rings_toplot, filter_distance=args.contactFilter, ctfile=args.ctFile)
+    N1_rings =  contact_filter(N1_rings, filter_distance=args.contactFilter, ctfile=args.ctFile)
+    N1N7_rings = contact_filter(N1N7_rings, filter_distance=args.contactFilter, ctfile=args.ctFile)
+    N7_rings =  contact_filter(N7_rings, filter_distance=args.contactFilter, ctfile=args.ctFile)
+
+
+
+
+adjust_frame(profile, rings_toplot, args.adjustFrame)
+dist_data = []
+dist_data1 = draw(rings_toplot, args.chain, args.crystalStart, args.crystalEnd, args.approx, args.ringType, render=True)
+dist_data2 = draw(N1_rings, args.chain, args.crystalStart, args.crystalEnd, args.approx, args.ringType, render=False)
+dist_data.append(dist_data1)
+dist_data.append(dist_data2)
+
+dist_histogram(dist_data, bins=np.linspace(0,50,10), title='{} {}cf filterneg={} rings'.format(args.inputRings, args.contactFilter, str(args.filterNegative)), name='{} {}cf filterneg={} rings.pdf'.format(args.inputRings, args.contactFilter, str(args.filterNegative)))
+plot.close('all')
+cmd.save(args.outputName + '.pse')
\ No newline at end of file
diff --git a/msDMS_Saleem&Miller_2025/Figure_1B_final.py b/msDMS_Saleem&Miller_2025/Figure_1B_final.py
new file mode 100644
index 0000000..188bfef
--- /dev/null
+++ b/msDMS_Saleem&Miller_2025/Figure_1B_final.py
@@ -0,0 +1,42 @@
+import argparse
+from ReactivityProfile import ReactivityProfile as rprofile
+import matplotlib as mpl
+import numpy as np
+mpl.use('Agg')
+mpl.rcParams['pdf.fonttype'] = 42
+mpl.rcParams['font.sans-serif'] = 'Arial'
+from matplotlib import pyplot as plot
+
+argparser = argparse.ArgumentParser()
+argparser.add_argument('--ssu_old')
+argparser.add_argument('--ssu_new')
+argparser.add_argument('--lsu_old')
+argparser.add_argument('--lsu_new')
+argparse.add_argument('--out')
+args = argparser.parse_args()
+small_old = rprofile(args.ssu_old)
+large_old = rprofile(args.lsu_old)
+small_new = rprofile(args.ssu_new)
+large_new = rprofile(args.lsu_new)
+
+
+old = [small_old.rawprofile[52], large_old.rawprofile[72]]
+new = [small_new.rawprofile[52],large_new.rawprofile[72]]
+
+
+xaxis = ['16s', '23s']
+barcount = 2
+ind = np.arange(barcount)
+width = 0.3
+
+fig, ax = plot.subplots()
+
+ax.bar(ind,new, width, label='new')
+ax.bar(ind+width , old, width, label='old')
+ax.set_xticks(ind + width/2, ('16s', '23s'))
+
+
+ax.set_title('m7g reactivities')
+ax.legend(loc='upper right')
+fig.tight_layout()
+plot.savefig(args.out)
\ No newline at end of file
diff --git a/msDMS_Saleem&Miller_2025/SI1_C_final.py b/msDMS_Saleem&Miller_2025/SI1_C_final.py
new file mode 100644
index 0000000..ec7bf1f
--- /dev/null
+++ b/msDMS_Saleem&Miller_2025/SI1_C_final.py
@@ -0,0 +1,46 @@
+import sys
+from ReactivityProfile import ReactivityProfile
+import matplotlib as mpl
+import matplotlib.pyplot as plt
+import numpy as np
+import argparse
+mpl.use('Agg')
+mpl.rcParams['font.family'] = 'sans-serif'
+mpl.rcParams['pdf.fonttype'] = 42
+mpl.rcParams['font.sans-serif'] = 'Arial'
+
+argparser = argparse.ArgumentParser()
+argparser.add_argument('--prof1')
+argparser.add_argument('--prof2')
+argparser.add_argument('--out')
+args = argparser.parse_args()
+
+prof1 = ReactivityProfile(args.prof1)
+prof2 = ReactivityProfile(args.prof2)
+
+fig, ax = plt.subplots()
+
+ax.step(prof1.nts, (prof1.rawprofile * 100), label=args.prof1, where='mid', linewidth=2, linestyle='-')
+ax.step(prof1.nts, (prof2.rawprofile * 100), label=args.prof2, where='mid', linewidth=2)
+
+#52 for ssu 72 for lsu
+seq =  [str(i) for i in prof1.sequence]
+#seq[72] = 'm7G'
+seq[52] = 'm7G'
+
+ax.set_xlabel('Nucleotide', fontsize=20)
+ax.set_xticks(prof1.nts)
+ax.set_xticklabels(seq)
+
+#ax.set_xlim([65,82])
+ax.set_xlim([44,64])
+
+
+ax.set_ylabel('Mutation rate(%)', fontsize=20)
+ax.set_ylim([0, 25])
+
+ax.legend()
+fig.tight_layout()
+
+fig.savefig(args.out)
+
diff --git a/msDMS_Saleem&Miller_2025/SIfig1_1b_final.py b/msDMS_Saleem&Miller_2025/SIfig1_1b_final.py
new file mode 100644
index 0000000..3186c5a
--- /dev/null
+++ b/msDMS_Saleem&Miller_2025/SIfig1_1b_final.py
@@ -0,0 +1,108 @@
+from ReactivityProfile import ReactivityProfile as rp
+import matplotlib as mpl
+import numpy as np
+mpl.use('Agg')
+mpl.rcParams['pdf.fonttype'] = 42
+mpl.rcParams['font.sans-serif'] = 'Arial'
+from matplotlib import pyplot as plot
+from matplotlib.ticker import AutoMinorLocator
+
+def get_data(listofprofs, subunit):
+    m7gs_mn = []
+    bgs_mn = []
+    if subunit == 'large':
+        x = 72
+    else:
+        print('smallsub')
+        x = 52
+    for i in listofprofs:
+        print(i.rawprofile)
+        m7gs_mn.append(i.rawprofile[x])
+        allreactivities = 0
+        for j in i.rawprofile:
+            if np.isfinite(j):
+                allreactivities += j
+        allreactivities -= i.rawprofile[x]
+        bg = allreactivities / (len(i.rawprofile) - 1)
+        bgs_mn.append(bg)
+    return m7gs_mn, bgs_mn
+
+
+Original_16s = rp('ph83_16s_16sm7_profile.txt')
+half_mn_16s = rp('05mM_16sm7_profile.txt')
+two_mn_16s =rp('2mM_16sm7_profile.txt')
+two5_mn_16s = rp('25ph83vs2ph83_16sm7_profile.txt')
+three_mn_16s = rp('3mM_16sm7_profile.txt')
+
+
+ph75_16s = rp('ph75_16sm7_profile.txt')
+ph8_16s = rp('ph8_16sm7_profile.txt')
+ph85_16s = rp('ph85_16sm7_profile.txt')
+finalcondition_16s = rp('2mmph9_16sm7_profile.txt')
+
+Original_23s = rp('ph83_23sm7_profile.txt')
+half_mn_23s = rp('05mM_23sm7_profile.txt')
+two_mn_23s = rp('2mM_23sm7_profile.txt')
+two5_mn_23s = rp('25mM_23sm7_profile.txt')
+three_mn_23s = rp('3mM_23sm7_profile.txt')
+
+ph75_23s = rp('ph75_23sm7_profile.txt')
+ph8_23s = rp('ph8_23sm7_profile.txt')
+ph85_23s = rp('ph85_16sm7_profile.txt')
+finalcondition_23s = rp('2mmph9_23sm7_profile.txt')
+
+ssu_manganese_m7gs, ssu_manganese_bgs = get_data([half_mn_16s, Original_16s, two_mn_16s, two5_mn_16s, three_mn_16s], '')
+ssu_ph_m7gs, ssu_ph_bgs = get_data([ph75_16s, ph8_16s, Original_16s, ph85_16s], '')
+ssu_final_m7g, ssu_final_bg = get_data([finalcondition_16s], '')
+
+lsu_manganese_m7gs, lsu_manganese_bgs = get_data([half_mn_23s, Original_23s, two_mn_23s, two5_mn_23s, three_mn_23s], 'large')
+lsu_ph_m7gs, lsu_ph_bgs = get_data([ph75_23s, ph8_23s, Original_23s, ph85_23s], 'large')
+lsu_final_m7g, lsu_final_bg = get_data([finalcondition_23s], 'large')
+
+
+manganese_m7gs = [np.mean(i) for i in zip(ssu_manganese_m7gs,lsu_manganese_m7gs)]
+ph_m7gs = [np.mean(i) for i in zip(ssu_ph_m7gs,lsu_ph_m7gs)]
+final_m7g = [np.mean(i) for i in zip(ssu_final_m7g,lsu_final_m7g)]
+
+
+manganese_bgs = [np.mean(i) for i in zip(ssu_manganese_bgs,lsu_manganese_bgs)]
+ph_bgs = [np.mean(i) for i in zip(ssu_ph_bgs,lsu_ph_bgs)]
+final_bg = [np.mean(i) for i in zip(ssu_final_m7g,lsu_final_bg)]
+
+
+#add in empty data so final condition can be included in plot
+ph_m7gs.append(0.0)
+ph_bgs.append(0.0)
+
+labels_1 = ['0.5', '1', '2', '2.5', '3']
+labels_2 = ['7.5', '8.0', '8.3', '8.5', '9']
+labels_3 = ['2mM Mn pH 8.3', '1mM Mn pH 9', '2mM Mn pH 9']
+fig,ax = plot.subplots(1, 4, figsize=(10,2))
+
+#ax[2] = ax[0].twinx()
+#share yaxis
+ax[0].set_title('Manganese Concentration')
+ax[0].scatter(labels_1, manganese_m7gs)
+ax[0].plot(labels_1, manganese_m7gs)
+#add bg data
+ax[0].scatter(labels_1, manganese_bgs)
+ax[0].scatter(labels_1, [0.0,0.0,final_m7g[0],0.0,0.0])
+ax[0].plot(labels_1, manganese_bgs)
+ax[0].set_yscale('log', base=10)
+
+
+ax[1].set_title('Acidity')
+ax[1].scatter(labels_2, ph_m7gs)
+ax[1].plot(labels_2, ph_m7gs)
+#add bg data
+ax[1].scatter(labels_2, ph_bgs)
+ax[1].plot(labels_2, ph_bgs)
+ax[1].scatter(labels_2, [0.0, 0.0 ,0.0 ,0.0, final_m7g[0]])
+ax[1].set_yscale('log', base=10)
+#ax[3] = ax[1].twinx()
+
+
+
+
+fig.tight_layout()
+plot.savefig('SIFigure_1A_averages.pdf')
\ No newline at end of file
diff --git a/msDMS_Saleem&Miller_2025/SIfig1_1d_final.py b/msDMS_Saleem&Miller_2025/SIfig1_1d_final.py
new file mode 100644
index 0000000..461632e
--- /dev/null
+++ b/msDMS_Saleem&Miller_2025/SIfig1_1d_final.py
@@ -0,0 +1,27 @@
+import pandas as pd
+import matplotlib as mpl
+import numpy as np
+mpl.use('Agg')
+mpl.rcParams['pdf.fonttype'] = 42
+mpl.rcParams['font.sans-serif'] = 'Arial'
+from matplotlib import pyplot as plt
+
+#df = pd.read_csv('2mmph9_Modified_16sm7_mutation_counts.txt',sep='\t',header=(0))
+df = pd.read_csv('ph9_Modified_23sm7_mutation_counts.txt',sep='\t',header=(0))
+
+#23s
+m7g = df.iloc[72]
+#16s
+#m7g = df.iloc[52]
+
+
+labels = 'GA', 'Other'
+data = [m7g['GA'], m7g['GT'] + m7g['GC'] + m7g['complex_insertion'] + m7g['complex_deletion'] + m7g['G_multinuc_mismatch']]
+
+fig, ax = plt.subplots()
+ax.pie(data, labels=labels, autopct='%1.1f%%')
+
+print(m7g['GA'])
+
+fig.tight_layout()
+plt.savefig('SIFigure_1C_23sfinalRT.pdf')
\ No newline at end of file
diff --git a/msDMS_Saleem&Miller_2025/figure1c_final.py b/msDMS_Saleem&Miller_2025/figure1c_final.py
new file mode 100644
index 0000000..c18bd38
--- /dev/null
+++ b/msDMS_Saleem&Miller_2025/figure1c_final.py
@@ -0,0 +1,35 @@
+from ReactivityProfile import ReactivityProfile as rprofile
+import argparse
+import matplotlib as mpl
+import numpy as np
+mpl.use('Agg')
+mpl.rcParams['pdf.fonttype'] = 42
+mpl.rcParams['font.sans-serif'] = 'Arial'
+from matplotlib import pyplot as plot
+
+argparser = argparse.ArgumentParser()
+argparser.add_argument('--sat')
+argparser.add_argument('--unsat')
+argparser.add_argument('--out')
+args = argparser.parse_args()
+
+satTPP = rprofile(args.sat)
+unsatTPP = rprofile(args.unsat)
+
+fig, ax = plot.subplots()
+
+
+#manual bg sub
+unsat_man_bg = np.nan_to_num(unsatTPP.rawprofile[18:120]) - np.nan_to_num(unsatTPP.backprofile[18:120])
+sat_man_bg = np.nan_to_num(satTPP.rawprofile[18:120]) - np.nan_to_num(satTPP.backprofile[18:120])
+
+
+ax.step(satTPP.nts[18:120], unsat_man_bg, label='Unsaturated', where='mid')
+ax.step(satTPP.nts[18:120], sat_man_bg, label='Saturated', where='mid')
+
+
+
+ax.set_title('N7 reactivities')
+ax.legend(loc='upper right')
+fig.tight_layout()
+plot.savefig(args.out)
\ No newline at end of file
diff --git a/msDMS_Saleem&Miller_2025/random_ringsample.py b/msDMS_Saleem&Miller_2025/random_ringsample.py
new file mode 100644
index 0000000..c97733a
--- /dev/null
+++ b/msDMS_Saleem&Miller_2025/random_ringsample.py
@@ -0,0 +1,103 @@
+import argparse
+import matplotlib.pyplot as plot
+import numpy as np
+import random
+import copy
+
+plot.rcParams['pdf.fonttype'] = 42
+plot.rcParams['font.sans-serif'] = 'Arial'
+
+def read_ring(input_rings):
+    length = 0
+    n1rings = []
+    n7rings = []
+    with open(input_rings) as ringin:
+        
+        for i,line in enumerate(ringin):
+            if i > 1:
+                spl = line.split()
+                n1 = int(spl[0])
+                n2 = int(spl[1])
+                a = float(spl[9])
+                #print(a)
+                if a < 2:
+                    continue
+                else:
+                    if n1 <= length and n2 <= length: #filter n1 rings
+                        n1rings.append((n1,n2))
+                    else:
+                        n7rings.append((n1,n2))
+            else:
+                if i == 0:
+                    length = int(line.split()[0])
+
+    #print(n1rings)
+    return length,n1rings, n7rings
+
+def read_primary_nts(fname):
+    p_locus = []
+    with open(fname) as inp:
+        for line in inp:
+            spl = line.split('-')
+            for i in range(int(spl[0]), int(spl[1]) + 1):
+                p_locus.append(i)
+    print(p_locus)
+    return p_locus
+
+def histogram(en, label = '', title='',name='sfd_hist.pdf'):
+    fig, ax = plot.subplots()
+    ax.hist(en, histtype = 'bar', bins=[0.0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0],label=label, align='left')
+    ax.axvline(x=np.percentile(en, [50]), label=label + " median " + str(np.percentile(en, [50])))
+    ax.axvline(x=np.mean(en), label=label + " mean " + str(np.mean(en)))
+    ax.axvline(x=.84, label='Observed')
+
+    
+    ax.set_ylabel('Count')
+    ax.set_xlabel('% rings between tertiary sites')
+
+    
+    ax.set_title(title)
+    ax.legend(loc='upper right')
+    fig.tight_layout()
+    plot.savefig(name)
+
+def sample_fold(rings,loci_nts, rna):
+    encapsulated = 0
+    for i in rings:
+        rna_copy = copy.deepcopy(rna)
+        nt1 = random.choice(rna_copy)
+        rna_copy.remove(nt1)
+        if nt1 - 1 in rna_copy:
+            rna_copy.remove(nt1 - 1)
+        if nt1 + 1 in rna_copy:
+            rna_copy.remove(nt1 + 1)
+        nt2 = random.choice(rna_copy)
+        #print(nt1, nt2)
+        if nt1 in loci_nts and nt2 in loci_nts:
+            encapsulated += 1
+    return encapsulated / len(rings)
+
+if __name__ == '__main__':
+    # tpp_tertnts.txt p546_tertnts.txt
+    loci = read_primary_nts('p546_tertnts.txt')
+
+    # p546_r2r3_intersect_shared_corrbuffer.txt satTPP_intersect_shared_corrbuffer.txt
+    rna_len,n1,n7 = read_ring('p546_r2r3_intersect_shared_corrbuffer.txt')
+
+    rna_nts = []
+    for i in range(1, rna_len + 1):
+        rna_nts.append(i)
+
+    samples = []
+    while len(samples) < 10000:
+        samples.append(sample_fold(n1, loci, rna_nts))
+    print(samples)
+
+    
+    hit = []
+    for i in samples:
+        if i >= 0.83: # 
+            hit.append(i)
+    print(len(hit)/ len(samples))
+    histogram(samples, title='P546', name='P546_10k_ringsample_hist.pdf')
+   
diff --git a/msDMS_Saleem&Miller_2025/si1_e_is_corrcode.txt b/msDMS_Saleem&Miller_2025/si1_e_is_corrcode.txt
new file mode 100644
index 0000000..e69de29
diff --git a/msDMS_Saleem&Miller_2025/si2_abcf_is corrcode.txt b/msDMS_Saleem&Miller_2025/si2_abcf_is corrcode.txt
new file mode 100644
index 0000000..e69de29
diff --git a/msDMS_Saleem&Miller_2025/si2_d+e_final.py b/msDMS_Saleem&Miller_2025/si2_d+e_final.py
new file mode 100644
index 0000000..df692f5
--- /dev/null
+++ b/msDMS_Saleem&Miller_2025/si2_d+e_final.py
@@ -0,0 +1,67 @@
+from json.tool import main
+import matplotlib
+matplotlib.use('Agg')
+import matplotlib.pyplot as plot
+import sys
+import RNAtools2 as RNAtools
+import numpy as np
+from sklearn.metrics import roc_curve, roc_auc_score
+from ReactivityProfile import ReactivityProfile
+plot.rcParams['pdf.fonttype'] = 42
+plot.rcParams['font.sans-serif'] = 'Arial'
+import argparse
+
+argparser = argparse.ArgumentParser()
+argparser.add_argument('--prof1')
+argparser.add_argument('--prof2')
+argparser.add_argument('--ctfile')
+argparser.add_argument('--out')
+args = argparser.parse_args()
+profobj = ReactivityProfile(args.prof1)
+prof2obj = ReactivityProfile(args.prof2)
+ctobj = RNAtools.CT(args.ctfile)
+pairedlist = ctobj.pairedResidueList(False)
+
+def getNtLists(profile, pairedlist, ntype):
+    
+        react = []
+        ispaired = []
+        for i,v in enumerate(profile.subprofile):               
+            if v > -10 and profile.sequence[i] == ntype:
+                react.append(v)
+                ispaired.append( int((i+1) in pairedlist) )
+            
+        print(react)
+        return react, ispaired
+
+
+labels = ('A', 'C', 'G', 'U')
+scores = []
+scores2 = []
+
+fig,ax = plot.subplots(1, 4, figsize=(8,2))
+
+for i,nt in enumerate(labels):
+    r, p = getNtLists(profobj, pairedlist, nt)
+    scores.append(roc_auc_score(p,r))
+
+for i,nt in enumerate(labels):
+    r, p = getNtLists(prof2obj, pairedlist, nt)
+    scores2.append(roc_auc_score(p,r))
+
+
+fig,ax = plot.subplots()
+
+barcount = 4
+ind = np.arange(barcount)
+width = 0.3
+
+fig, ax = plot.subplots()
+
+ax.bar(ind,scores, width, label='new')
+ax.bar(ind+width , scores2, width, label='old')
+ax.set_xticks(ind + width/2, labels)
+
+
+fig.tight_layout()
+plot.savefig(args.out)
\ No newline at end of file
diff --git a/msDMS_Saleem&Miller_2025/si2_g_final.py b/msDMS_Saleem&Miller_2025/si2_g_final.py
new file mode 100644
index 0000000..719f634
--- /dev/null
+++ b/msDMS_Saleem&Miller_2025/si2_g_final.py
@@ -0,0 +1,60 @@
+import re
+import sys
+import argparse
+from sklearn.metrics import roc_curve, roc_auc_score
+import matplotlib.pyplot as pyplot
+import numpy as np
+
+from ReactivityProfile import ReactivityProfile as rprofile
+import RNAStructureObjects as RNAtools
+
+argparser = argparse.ArgumentParser()
+argparser.add_argument('--n7profile')
+argparser.add_argument('--ctfile')
+argparser.add_argument('--out')
+args = argparser.parse_args()
+
+n7 = rprofile(args.n7profile)
+
+bgprof = n7.backprofile
+rawprof = n7.rawprofile
+bgsub = n7.subprofile
+normprof = n7.normprofile
+
+react, seq = [],[]
+x=0
+
+
+while x < (len(n7.sequence)):
+    seq.append(n7.sequence[x])
+    react.append(n7.subprofile[x])
+    x += 1
+
+
+ct = RNAtools.CT(args.ctfile)
+pairs = ct.pairedResidueList()
+
+s,r = [],[]
+
+# go through the RNA, only looking at Gs with defined reactivites (not NaN), check to see if the G measured is in the crystal structure as well
+for i,v in enumerate(seq):
+    if v == 'G' and np.isfinite(react[i]):
+        r.append(react[i])
+        if i in pairs:
+            s.append(True)
+        else:
+            s.append(False)
+
+
+# compute ROC curve and plot, along with AUC
+tmp = roc_curve(s, r, drop_intermediate=False)
+pyplot.plot(tmp[0], tmp[1], label='Basepairing: {:.2f}'.format(roc_auc_score(s,r)))
+    
+
+pyplot.plot([0, 1], [0, 1], linestyle="--")
+pyplot.xlim([0.0, 1.0])
+pyplot.ylim([0.0, 1.05])
+pyplot.title('')
+pyplot.legend()
+
+pyplot.savefig(args.out)
\ No newline at end of file
diff --git a/msDMS_Saleem&Miller_2025/si2_h+i_final.py b/msDMS_Saleem&Miller_2025/si2_h+i_final.py
new file mode 100644
index 0000000..450bf1c
--- /dev/null
+++ b/msDMS_Saleem&Miller_2025/si2_h+i_final.py
@@ -0,0 +1,238 @@
+import pandas as pd
+from sklearn.metrics import roc_curve, roc_auc_score
+import matplotlib.pyplot as plot
+import numpy as np
+import scipy.stats
+import math
+import itertools
+from ReactivityProfile import ReactivityProfile as rprofile
+import argparse
+
+plot.rcParams['pdf.fonttype'] = 42
+plot.rcParams['font.sans-serif'] = 'Arial'
+
+argparser = argparse.ArgumentParser()
+argparser.add_argument('--prof1', required=True)
+argparser.add_argument('--prof2')
+argparser.add_argument('--out', required=True)
+args = argparser.parse_args()
+
+
+# read in the N7 reactivity file
+n7 = rprofile(args.prof1)
+n72 = rprofile(args.prof2)
+
+
+
+def reactivityByneighbors(self, add=None, GA=False, Triplet=False, pos=1, byNT=False, name=''):
+    '''
+    Takes a reactivity profile as an argument.
+    Plots reactivity rates based on neighbors. 
+    Pass a negative number to look upstream, positive for downstream. 
+    Will group purines and pyrimidines by default.
+    '''
+
+    def adjacent_values(vals, q1, q3):
+        upper_adjacent_value = q3 + (q3 - q1) * 1.5
+        upper_adjacent_value = np.clip(upper_adjacent_value, q3, vals[-1])
+
+        lower_adjacent_value = q1 - (q3 - q1) * 1.5
+        lower_adjacent_value = np.clip(lower_adjacent_value, vals[0], q1)
+        return lower_adjacent_value, upper_adjacent_value
+
+
+    def set_axis_style(ax, labels):
+        ax.set_xticks(np.arange(1, len(labels) + 1), labels=labels)
+        ax.set_xlim(0.25, len(labels) + 0.75)
+        ax.set_xlabel('Sample name')
+
+    if add is not None:
+        self.subprofile = np.concatenate([self.subprofile, add.subprofile])
+        self.sequence = np.concatenate([self.sequence, add.sequence])
+
+    def stars(p):
+    ##get stars for annotating plots using a p value
+        if p < 0.000001:
+            return '******'
+        elif p < 0.00001:
+            return '*****'
+        elif p < 0.0001:
+            return "****"
+        elif (p < 0.001):
+            return "***"
+        elif (p < 0.01):
+            return "**"
+        elif (p < 0.05):
+            return "*"
+        else:
+            return "-"
+
+    if Triplet:
+            nts = ['A','G','U','C']
+            _triplets = {}
+            for i in nts:
+                for j in nts:
+                    for k in nts:
+                        _triplets[i + j + k] = []
+                
+            for n in nts:
+                for i, v in enumerate(self.subprofile):
+                    if np.isfinite(v) and self.sequence[i] == n:
+                        pre = self.sequence[i - 1]
+                        nxt = self.sequence[i + 1]
+                        _triplets[pre + n + nxt].append(v)
+
+
+    elif not GA:
+            '''
+            Data is ordered (A) purines, pyrimidines then (G) purines, pyrimidines, etc.
+            
+            '''
+
+
+            nts = ['A','G','U','C']
+            data = []
+            labels = []
+            for n in nts:
+                A = []
+                G = []
+                T = []
+                C = []
+                purines = []
+                pyrimidine = []
+                for i,v in enumerate(self.subprofile):
+                    if np.isfinite(v) and self.sequence[i] == n:
+                        if byNT:
+                            if self.sequence[i + pos] == 'A':
+                                A.append(v)
+                            elif self.sequence[i + pos] == 'G':
+                                G.append(v)
+                            elif self.sequence[i + pos] == 'U':
+                                T.append(v)
+                            else:
+                                C.append(v)
+                        else:
+                            if self.sequence[i + pos] == 'A' or self.sequence[i + pos] == 'G':
+                                purines.append(v)
+                            else:
+                                pyrimidine.append(v)
+                if byNT:
+                    data.append(A.copy())
+                    data.append(G.copy())
+                    data.append(T.copy())
+                    data.append(C.copy())
+                    labels.append(str(n) + ' A')
+                    labels.append(str(n) + ' G')
+                    labels.append(str(n) + ' T')
+                    labels.append(str(n) + ' C')
+                else:
+                    data.append(purines.copy())
+                    data.append(pyrimidine.copy())
+                    labels.append(str(n) + ' Purines')
+                    labels.append(str(n) + ' Pyrimadines')
+
+    else:
+        if byNT:
+            A = []
+            G = []
+            T = []
+            C = []
+
+            for i, v in enumerate(self.subprofile):
+                if np.isfinite(v):
+                    if self.sequence[i + pos] == 'A':
+                        A.append(v)
+                    elif self.sequence[i + pos] == 'G':
+                        G.append(v)
+                    elif self.sequence[i + pos] == 'U':
+                        T.append(v)
+                    else:
+                        C.append(v)
+            labels = ['A','G','U','C']
+
+            data = [A,G,T,C]
+
+
+        else:
+                purines = []
+                pyrimidine = []
+                print('GA')
+                for i, v in enumerate(self.normprofile):
+                    if np.isfinite(v):
+                        if self.sequence[i + pos] == 'A' or self.sequence[i + pos] == 'G':
+                            purines.append(v)
+                        else:
+                            pyrimidine.append(v)
+
+                labels = ['purines', 'pyrimidines']
+                data = [purines, pyrimidine]
+    if not Triplet:
+        if GA:
+            fig, ax = plot.subplots(nrows = 2, ncols = 1, figsize=(6,8))
+        elif not GA and not byNT:
+            fig, ax = plot.subplots(nrows = 2, ncols = 1, figsize=(8,16))
+        else:
+            fig, ax = plot.subplots(nrows = 2, ncols = 1, figsize=(30,30))
+        quants = []
+        for i in data:
+            quants.append([.25,.75])
+
+
+        ax[0].hist(data[0], histtype = 'step', label='Purines')
+        ax[0].hist(data[1], histtype = 'step', label='Pyrimidines')
+        ax[0].legend()
+
+        for i in data:
+            quantiles = np.quantile(i, np.array([ 0.75]))
+            print(quantiles)
+
+        tabledata = [['Comparison', 'P value']]
+        for i,j in itertools.combinations(data, 2):
+
+                u_stat, p = scipy.stats.mannwhitneyu(i, j, alternative='two-sided')
+
+                p_value = p
+                print(p_value)
+                print('{}   {}'.format(data.index(i), data.index(j)))
+
+                #get max and min for putting the annotation in the right place
+                y_max = np.max(np.concatenate((i, j)))
+                y_min = np.min(np.concatenate((i, j)))
+
+                tabledata.append([labels[data.index(i)] + ' vs. ' + labels[data.index(j)], str(p_value)])
+
+            
+        ax[1].table(cellText=tabledata, loc='center' )
+
+        fig.tight_layout(pad = 2)
+        fig.savefig(name + '.pdf')
+    else:
+        out = open('{}.txt'.format(name), 'w')
+        out.write('triplet\tmean_rep1\tmedian_rep1\ttriplet_count_rep1\n')
+        quants = []
+        labels = []
+        data = []
+        for i in _triplets:
+            print(i)
+            try:
+                quantiles = np.quantile(_triplets[i], np.array([0.5]))
+                mean = np.mean(_triplets[i])
+                out.write('{}\t{:.6}\t{:.6}\t{}\n'.format(i, mean, quantiles[0], len(_triplets[i])))
+                labels.append(i)
+                data.append(_triplets[i])
+                quants.append([.25,.75])
+            except:
+                continue
+
+        fig, ax = plot.subplots(nrows = 1, ncols = 1, figsize=(30,30))
+
+        ax.violinplot(data, showmedians=True, quantiles=quants)
+
+        ax.set_xticks(np.arange(1, len(labels) + 1), labels=labels)
+        ax.set_ylim(0,0.25)
+        ax.set_ylabel('BG sub reactivity')
+        fig.tight_layout(pad = 2)
+        fig.savefig(name + '.pdf')
+
+
+reactivityByneighbors(n7, add=n72, GA=True, byNT=False, Triplet=False, pos=1, name=args.out)
diff --git a/msDMS_Saleem&Miller_2025/si3_ac_calcs.py b/msDMS_Saleem&Miller_2025/si3_ac_calcs.py
new file mode 100644
index 0000000..c7c6603
--- /dev/null
+++ b/msDMS_Saleem&Miller_2025/si3_ac_calcs.py
@@ -0,0 +1,96 @@
+import argparse
+import matplotlib.pyplot as plot
+import numpy as np
+import glob
+from ReactivityProfile import ReactivityProfile as rprofile
+
+argparser = argparse.ArgumentParser()
+argparser.add_argument('--sasa')
+argparser.add_argument('--subunit', choices=['16', '23'])
+argparser.add_argument('--out')
+args = argparser.parse_args()
+
+prot_cut = 1.6
+
+sasa_cut = 1
+
+infile = args.sasa
+
+
+out = open(args.out,'w')
+
+adjustframe = 0
+def adjust_frame(profile, difference):
+    profile.nts = [x + difference for x in profile.nts]
+
+#match end half of profile names
+false_p = []
+false_out = open('{}_false_negatives.txt'.format(sasa.split('.')[0]), 'w')
+for file in glob.glob("*ec{}S_profile.txtga".format(args.subunit)):
+    print(file)
+    n7 = rprofile(file)
+    adjust_frame(n7,adjustframe)
+
+    # read in the sasa file output from pymol script
+    sasa = {}
+    with open(infile) as inp:
+        for line in inp:
+            spl = line.split()
+            sasa[int(spl[0])] = float(spl[1])
+
+    x = 0
+    protected = {}
+    cfprotected = {}
+    while x < len(n7.normprofile):
+        ##loop through the normprofile and take any normalized values above what we consider "protected"
+        if np.isfinite(n7.normprofile[x]) and n7.normprofile[x] >= prot_cut and n7.sequence[x] == 'G' and n7.nts[x] in sasa:
+            protected[n7.nts[x]] = n7.normprofile[x]
+
+        x += 1
+
+
+    positives = {}
+    negatives = {}
+    for i in sasa:
+            ##if measured area is greater than our cutoff, it is unprotected and therefore a "negative"
+            if sasa[i] >= sasa_cut:
+                negatives[i] = sasa[i]
+            ##if measured area is lower than our cutoff it is protected and therefore "positive"
+            if sasa[i] < sasa_cut:
+                positives[i] =  sasa[i]
+
+    TP = 0
+    FP = 0
+    TN = 0
+    FN = 0
+    for i in sasa:
+        ##compare our dict of protected values against the positive and negatives at every position, tick up appropriate bin
+        if i in protected and i in positives:
+            TP += 1
+        if i in protected and i in negatives:
+            FP += 1
+            
+        if i in positives and i not in protected:
+            FN += 1
+            false_p.append(str(i))
+        if i in negatives and i not in protected:
+            TN += 1
+
+    out.write(' TP: {}\n FP: {}\n TN: {}\n FN: {}\n'.format(TP, FP, TN, FN))
+    out.write('Sensitivity: {}\n'.format(TP/(TP+FN)))
+    out.write('Specificity: {}\n'.format(TN/(TN+FP)))
+    out.write('PPV: {}\n'.format(TP/(TP+FP)))
+    out.write('NPV: {}\n'.format(TN/(TN+FN)))
+    out.write('Diagnostic Accuracy: {}\n'.format((TP + TN)/(TP + FP + FN + TN))) 
+    out.write('Protection cutoff: {}\nSASA cutoff:{}\n'.format(prot_cut,sasa_cut))
+    out.write(str(len(sasa)))
+    out.write('\n')
+    out.write(str(TP+FP+TN+FN))
+    out.write('\n')
+
+print(len(set(false_p)))
+for nt in set(false_p):
+    false_out.write(nt+' ')
+
+out.close()
+
diff --git a/msDMS_Saleem&Miller_2025/si3_f_final.py b/msDMS_Saleem&Miller_2025/si3_f_final.py
new file mode 100644
index 0000000..1b1232c
--- /dev/null
+++ b/msDMS_Saleem&Miller_2025/si3_f_final.py
@@ -0,0 +1,69 @@
+import numpy as np
+from ReactivityProfile import ReactivityProfile as rprofile
+import argparse
+from pymol import cmd
+
+def get_nts_profile(profile):
+    nt_list = []
+    prof = rprofile(profile)
+    for i,v in enumerate(prof.normprofile):
+        if np.isfinite(v) and v >= 1.6:
+            nt_list.append(i + 1)
+    return nt_list
+
+def adjust_frame(number, listnts):
+    adjustednts = []
+    for i in listnts:
+        adjustednts.append(i+number)
+    return adjustednts
+
+
+
+def get_nts(input_file):
+    nts = []
+    with open(input_file, 'r') as inp:
+        for i,line in enumerate(inp):
+            if i == 0 or i == 1:
+                continue
+            spl= line.split()
+            nts.append(int(spl[0]))
+    return nts
+
+def get_nt_list(inputfile):
+    with open(inputfile, 'r') as inp:
+        for line in inp:
+            nts = line.split()
+    return nts
+
+
+def color_nts(chain, nt_list):
+    cmd.set_color('protected', (83, 48, 95))
+    for G in nt_list:
+        try:
+            cmd.select('protected_nts', 'chain {} and resi {}'.format(chain, G), merge=1)
+            cmd.color('protected', 'chain {} and resi {}'.format(chain, G))
+        except:
+            print('Nt {} not in structure'.format(G))
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--pdbfile', required=True, help='path to crystal structure')
+    parser.add_argument('--chain', type=str, required=True, help='RNA chain id in crystal structure')
+    parser.add_argument('--input', required=True, help='path to infile')
+    parser.add_argument('--outputName', required=True)
+    parser.add_argument('--adjustframe', type=int)
+    parser.add_argument('--profile', action="store_true")
+    args = parser.parse_args()
+    cmd.load(args.pdbfile)
+    #to_color = get_nts(args.input)
+    if not args.profile:
+        to_color = get_nt_list(args.input)
+    else:
+        to_color = get_nts_profile(args.input)
+    if args.adjustframe is not None:
+        to_color = adjust_frame(args.adjustframe,to_color)
+
+    color_nts(args.chain, to_color)
+
+    cmd.save(args.outputName + '.pse')
\ No newline at end of file
diff --git a/msDMS_Saleem&Miller_2025/si6_a_final.py b/msDMS_Saleem&Miller_2025/si6_a_final.py
new file mode 100644
index 0000000..b23d8b5
--- /dev/null
+++ b/msDMS_Saleem&Miller_2025/si6_a_final.py
@@ -0,0 +1,99 @@
+
+import random
+import numpy as np
+from ReactivityProfile import ReactivityProfile as rprofile
+import RNAStructureObjects as RNAtools
+
+import matplotlib.pyplot as plot
+import scipy.stats
+
+import sys, os, math
+import argparse
+plot.rcParams['pdf.fonttype'] = 42
+plot.rcParams['font.sans-serif'] = 'Arial'
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--profile', required=True, help='path to profile')
+parser.add_argument('--ctfile', required=True)
+parser.add_argument('--profile2', required=True, help='path to profile')
+parser.add_argument('--ctfile2', required=True)
+parser.add_argument('--output', required=True, help='output prefix')
+parser.add_argument('--cutoff', type=int, default=3)
+parser.add_argument('--filterdistance', type=int, default=10)
+args = parser.parse_args()
+
+
+def contact_filter(protections,CT1, filter_distance=0):
+    
+    num_within_range = {}
+    for i in protections:
+        count = 0
+        for j in protections:
+            if i == j:
+                continue
+            if CT1.contactDistance(i, j) > filter_distance:
+                #print('filtered')
+                continue
+            else:
+                count += 1
+        num_within_range[i] = count
+    return num_within_range
+
+def violin_plot(name, labels, data, p):
+    quants = []
+    for i in data:
+        quants.append([.25,.75])
+
+    fig, ax = plot.subplots(nrows = 1, ncols = 1)
+    ax.violinplot(data, showmedians=True, quantiles=quants)
+    ax.set_xticks(np.arange(1, len(labels) + 1), labels=labels)
+    #ax.set_ylim(0, 40)
+    ax.set_ylabel('number of protections within CF 5')
+    ax.set_title('P = {}'.format(p))
+    fig.tight_layout(pad = 2)
+    fig.savefig(name + '.pdf')
+
+
+prof = rprofile(args.profile)
+CT = RNAtools.CT(args.ctfile)
+gs = []
+prot = []
+random_prot = []
+for i,v in enumerate(prof.normprofile):
+    if np.isfinite(v):
+        gs.append(i+1)
+    if v >= args.cutoff:
+        prot.append(i +1)
+
+random_prot = random.sample(gs, k=len(prot))
+print(prot)
+print(random_prot)
+random_counts = contact_filter(random_prot, CT, args.filterdistance)
+counts = contact_filter(prot, CT, args.filterdistance)
+
+prof2 = rprofile(args.profile2)
+CT2 = RNAtools.CT(args.ctfile2)
+gs2 = []
+prot2 = []
+random_prot2 = []
+for i,v in enumerate(prof2.normprofile):
+    if np.isfinite(v):
+        gs2.append(i+1)
+    if v >= args.cutoff:
+        prot2.append(i +1)
+
+random_prot2 = random.sample(gs2, k=len(prot2))
+print(prot2)
+print(random_prot2)
+random_counts2 = contact_filter(random_prot2, CT2, args.filterdistance)
+counts2 = contact_filter(prot2, CT2, args.filterdistance)
+
+x_labels = ['16s Observed', '23s real', '16s Random Distribution', '23s random']
+data_toplot = [list(counts.values()), list(counts2.values()), list(random_counts.values()), list(random_counts2.values())]
+u, p = scipy.stats.mannwhitneyu(list(counts.values()), list(random_counts.values()), alternative='two-sided')
+u2, p2 = scipy.stats.mannwhitneyu(list(counts2.values()), list(random_counts2.values()), alternative='two-sided')
+print(p)
+print(p2)
+
+violin_plot(args.output, x_labels, data_toplot, str(p) + '\t' + str(p2))
+

From 2d2399319a7454e9887566ea728a36339a55fc8a Mon Sep 17 00:00:00 2001
From: tmsmiller <1tmsmiller@gmail.com>
Date: Thu, 4 Dec 2025 16:42:13 -0600
Subject: [PATCH 2/8] Create README.md

---
 msDMS_Saleem&Miller_2025/README.md | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 msDMS_Saleem&Miller_2025/README.md

diff --git a/msDMS_Saleem&Miller_2025/README.md b/msDMS_Saleem&Miller_2025/README.md
new file mode 100644
index 0000000..fd70836
--- /dev/null
+++ b/msDMS_Saleem&Miller_2025/README.md
@@ -0,0 +1 @@
+Code that is not a part of StructureAnalysisTools relevant to the figures of msDMS-MaP(2025).

From dce2d11eaf393f6fe35a83131633b6de39a593ab Mon Sep 17 00:00:00 2001
From: tmsmiller <1tmsmiller@gmail.com>
Date: Thu, 4 Dec 2025 16:42:33 -0600
Subject: [PATCH 3/8] Update README.md

---
 msDMS_Saleem&Miller_2025/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/msDMS_Saleem&Miller_2025/README.md b/msDMS_Saleem&Miller_2025/README.md
index fd70836..a5d4f3e 100644
--- a/msDMS_Saleem&Miller_2025/README.md
+++ b/msDMS_Saleem&Miller_2025/README.md
@@ -1 +1 @@
-Code that is not a part of StructureAnalysisTools relevant to the figures of msDMS-MaP(2025).
+Code that is not a part of StructureAnalysisTools and relevant to the figures of msDMS-MaP(2025).

From fec949e1b06588656b02366e2f728b51ff32dcd0 Mon Sep 17 00:00:00 2001
From: tmsmiller <1tmsmiller@gmail.com>
Date: Thu, 4 Dec 2025 16:44:04 -0600
Subject: [PATCH 4/8] Delete msDMS_Saleem&Miller_2025/si1_e_is_corrcode.txt

---
 msDMS_Saleem&Miller_2025/si1_e_is_corrcode.txt | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 delete mode 100644 msDMS_Saleem&Miller_2025/si1_e_is_corrcode.txt

diff --git a/msDMS_Saleem&Miller_2025/si1_e_is_corrcode.txt b/msDMS_Saleem&Miller_2025/si1_e_is_corrcode.txt
deleted file mode 100644
index e69de29..0000000

From 8ead1d78af715c8cc8cc8b6d6b619385812e7077 Mon Sep 17 00:00:00 2001
From: tmsmiller <1tmsmiller@gmail.com>
Date: Thu, 4 Dec 2025 16:44:22 -0600
Subject: [PATCH 5/8] Delete msDMS_Saleem&Miller_2025/si2_abcf_is corrcode.txt

---
 msDMS_Saleem&Miller_2025/si2_abcf_is corrcode.txt | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 delete mode 100644 msDMS_Saleem&Miller_2025/si2_abcf_is corrcode.txt

diff --git a/msDMS_Saleem&Miller_2025/si2_abcf_is corrcode.txt b/msDMS_Saleem&Miller_2025/si2_abcf_is corrcode.txt
deleted file mode 100644
index e69de29..0000000

From 5525af5ca5cd0f1b163126c0e39516bc4d94bae2 Mon Sep 17 00:00:00 2001
From: tmsmiller <1tmsmiller@gmail.com>
Date: Fri, 5 Dec 2025 14:23:22 -0600
Subject: [PATCH 6/8] Delete msDMS_Saleem&Miller_2025 directory

---
 msDMS_Saleem&Miller_2025/2b+c_final.py        | 133 -------
 msDMS_Saleem&Miller_2025/2d_final.py          |  75 ----
 msDMS_Saleem&Miller_2025/3b_final.py          |  80 ----
 msDMS_Saleem&Miller_2025/3d_calcs_final.py    | 103 -----
 msDMS_Saleem&Miller_2025/4c+e.py              | 367 ------------------
 msDMS_Saleem&Miller_2025/Figure_1B_final.py   |  42 --
 msDMS_Saleem&Miller_2025/README.md            |   1 -
 msDMS_Saleem&Miller_2025/SI1_C_final.py       |  46 ---
 msDMS_Saleem&Miller_2025/SIfig1_1b_final.py   | 108 ------
 msDMS_Saleem&Miller_2025/SIfig1_1d_final.py   |  27 --
 msDMS_Saleem&Miller_2025/figure1c_final.py    |  35 --
 msDMS_Saleem&Miller_2025/random_ringsample.py | 103 -----
 msDMS_Saleem&Miller_2025/si2_d+e_final.py     |  67 ----
 msDMS_Saleem&Miller_2025/si2_g_final.py       |  60 ---
 msDMS_Saleem&Miller_2025/si2_h+i_final.py     | 238 ------------
 msDMS_Saleem&Miller_2025/si3_ac_calcs.py      |  96 -----
 msDMS_Saleem&Miller_2025/si3_f_final.py       |  69 ----
 msDMS_Saleem&Miller_2025/si6_a_final.py       |  99 -----
 18 files changed, 1749 deletions(-)
 delete mode 100644 msDMS_Saleem&Miller_2025/2b+c_final.py
 delete mode 100644 msDMS_Saleem&Miller_2025/2d_final.py
 delete mode 100644 msDMS_Saleem&Miller_2025/3b_final.py
 delete mode 100644 msDMS_Saleem&Miller_2025/3d_calcs_final.py
 delete mode 100644 msDMS_Saleem&Miller_2025/4c+e.py
 delete mode 100644 msDMS_Saleem&Miller_2025/Figure_1B_final.py
 delete mode 100644 msDMS_Saleem&Miller_2025/README.md
 delete mode 100644 msDMS_Saleem&Miller_2025/SI1_C_final.py
 delete mode 100644 msDMS_Saleem&Miller_2025/SIfig1_1b_final.py
 delete mode 100644 msDMS_Saleem&Miller_2025/SIfig1_1d_final.py
 delete mode 100644 msDMS_Saleem&Miller_2025/figure1c_final.py
 delete mode 100644 msDMS_Saleem&Miller_2025/random_ringsample.py
 delete mode 100644 msDMS_Saleem&Miller_2025/si2_d+e_final.py
 delete mode 100644 msDMS_Saleem&Miller_2025/si2_g_final.py
 delete mode 100644 msDMS_Saleem&Miller_2025/si2_h+i_final.py
 delete mode 100644 msDMS_Saleem&Miller_2025/si3_ac_calcs.py
 delete mode 100644 msDMS_Saleem&Miller_2025/si3_f_final.py
 delete mode 100644 msDMS_Saleem&Miller_2025/si6_a_final.py

diff --git a/msDMS_Saleem&Miller_2025/2b+c_final.py b/msDMS_Saleem&Miller_2025/2b+c_final.py
deleted file mode 100644
index f90b8ef..0000000
--- a/msDMS_Saleem&Miller_2025/2b+c_final.py
+++ /dev/null
@@ -1,133 +0,0 @@
-from ReactivityProfile import ReactivityProfile as rprof
-import matplotlib.pyplot as plot
-import numpy as np
-import argparse
-plot.rcParams['pdf.fonttype'] = 42
-plot.rcParams['font.sans-serif'] = 'Arial'
-
-
-def reactivityByNt(self, resnums = None, nts=None, name = None):
-        """ return a list of reactivities for a given set of nts (array), or nt type"""
-        
-        pro = self.profile(name)
-        
-        mask = np.isfinite(pro)
-        #with np.errstate(invalid='ignore'):
-        #    mask = mask & (pro > -0.3) & (pro < 4.0)
-
-        try:
-            ntit = iter(nts)
-            ntmask = (self.sequence == next(ntit))
-            for n in ntit:
-                ntmask = ntmask | (self.sequence == n) 
-            
-            mask = mask & ntmask
-        except TypeError:
-            pass
-
-        try:
-            resnums = set(resnums)
-            mask = mask & np.array([i in resnums for i in self.nts])
-        except TypeError:
-            pass
-
-        return pro[mask]
-
-
-def mutHistogram(self, name = None,name_2 = None, name_3=None, nts = None, resnums = None,
-                     bins=100,axes = None, pool=False, writename='mutHist.pdf', **kwargs):
-        
-
-        
-        write = False
-        if axes is None:
-            fig, axes = plot.subplots()
-            write = True
-
-        if pool:
-            rxn = ((self.reactivityByNt(name='norm', nts='G', resnums = resnums)))
-            rxn_2 = ((name.reactivityByNt(name='norm', nts='G', resnums = resnums)))
-            rxn_3 = ((name_2.reactivityByNt(name='norm', nts='G', resnums = resnums)))
-            rxn_4 = ((name_3.reactivityByNt(name='norm', nts='G', resnums = resnums)))
-            
-            bins=np.histogram(np.hstack((rxn,rxn_2,rxn_3)), bins=(np.arange(0.0, 10)))[1]
-            
-            axes.hist(rxn, histtype = 'step', bins = bins, label='InCell', **kwargs)
-            
-            axes.hist(rxn_2, histtype= 'step', bins=bins, label='Cell Free', **kwargs)
-            
-            axes.hist(rxn_3, histtype= 'step', bins=bins, label='Urea', **kwargs)
-            axes.hist(rxn_4, histtype= 'step', bins=bins, label='Cell Free -Mg', **kwargs)
-        
-        else:
-        
-            rxn = ((self.reactivityByNt(name='norm', nts='G', resnums = resnums)))        
-            axes.hist(rxn, histtype = 'step', bins = bins, label='In Cell', **kwargs)
-
-            if name is not None:
-
-                rxn_2 = ((name.reactivityByNt(name='norm', nts='G', resnums = resnums)))
-
-                axes.hist(rxn_2, histtype= 'step', bins=bins, label='Cell Free', **kwargs)
-        
-            if name_2 is not None:
-
-                rxn_3 = ((name_2.reactivityByNt(name='norm', nts='G', resnums = resnums)))
-
-                axes.hist(rxn_3, histtype= 'step', bins=bins, label='Urea', **kwargs)
-            if name_3 is not None:
-
-                rxn_4 = ((name_3.reactivityByNt(name='norm', nts='G', resnums = resnums)))
-
-                axes.hist(rxn_4, histtype= 'step', bins=bins, label='Cell Free -Mg', **kwargs)
-
-
-        
-
-        axes.legend(loc='upper left')
-        axes.set_ylabel('Count')
-        axes.set_xlabel('Norm N7 reactivity')
-        axes.set_xlim(-3.0, 3.0)
-        axes.set_ylim(0,350)
-        axes.yaxis.set_ticks(np.arange(0, 375, 25))
-        axes.set_title('23s')
-
-        if write:
-            plot.savefig(writename)
-
-
-
-
-def winsorize(prof):
-    for i, v in enumerate(prof):
-        if prof[i] > 3:
-            prof[i] = 3
-        if prof[i] < -3:
-            prof[i] = -3
-
-    return prof
-
-
-if __name__ == '__main__':
-    ntorder = ('A','C','G','U')
-
-    masknt = ('A', 'C', 'U')
-    argparser = argparse.ArgumentParser()
-    argparser.add_argument('--incell')
-    argparser.add_argument('--cf')
-    argparser.add_argument('--cfnomg')
-    argparser.add_argument('--urea')
-    args = argparser.parse_args()
-    #read in reactivity profiles
-    incell = rprof(args.incell)
-    cellfree = rprof(args.cf)
-    cf_nomg = rprof(args.cfnomg)
-    urea = rprof(args.urea)
-
-    incell.normprofile = winsorize(incell.normprofile)
-    cellfree.normprofile = winsorize(cellfree.normprofile)
-    urea.normprofile = winsorize(urea.normprofile)
-    cf_nomg.normprofile= winsorize(cf_nomg.normprofile)
-
-    #pass in incell first, then denatured
-    mutHistogram(incell,cellfree, urea,cf_nomg, bins=np.linspace(-3.0, 3.0, num=25),writename ='Fig2B_23s_InCellvsCFvsUrea_norm_top_08052024.pdf')
\ No newline at end of file
diff --git a/msDMS_Saleem&Miller_2025/2d_final.py b/msDMS_Saleem&Miller_2025/2d_final.py
deleted file mode 100644
index 31e4df4..0000000
--- a/msDMS_Saleem&Miller_2025/2d_final.py
+++ /dev/null
@@ -1,75 +0,0 @@
-from sklearn.metrics import roc_curve, roc_auc_score
-import matplotlib.pyplot as plot
-import numpy as np
-import argparse
-
-from ReactivityProfile import ReactivityProfile as rprofile
-
-argparser = argparse.ArgumentParser()
-argparser.add_argument('--n7prof')
-argparser.add_argumnet('--sasa')
-args = argparser.parse_args()
-
-
-
-n7 = rprofile(args.n7prof)
-
-infile = args.sasa
-
-
-# read in the sasa file output from pymol script
-sasa = {}
-with open(infile) as inp:
-    for line in inp:
-        spl = line.split()
-        sasa[int(spl[0])] = float(spl[1])
-
-
-prot_cut = 1.6
-sasa_cut = 1
-
-x = 0
-protected = {}
-cfprotected = {}
-while x < len(n7.normprofile):
-    ##loop through the normprofile and take any normalized values above what we consider "protected"
-    if np.isfinite(n7.normprofile[x]) and n7.normprofile[x] >= prot_cut and n7.sequence[x] == 'G' and n7.nts[x] in sasa:
-        protected[n7.nts[x]] = n7.normprofile[x]
-    x += 1
-
-
-positives = {}
-negatives = {}
-for i in sasa:
-        ##if measured area is greater than our cutoff, it is unprotected and therefore a "negative"
-        if sasa[i] >= sasa_cut:
-            negatives[i] = sasa[i]
-        ##if measured area is lower than our cutoff it is protected and therefore "positive"
-        if sasa[i] < sasa_cut:
-            positives[i] =  sasa[i]
-
-TP = 0
-FP = 0
-TN = 0
-FN = 0
-for i in sasa:
-    ##compare our dict of protected values against the positive and negatives at every position, tick up appropriate bin
-    if i in protected and i in positives:
-        TP += 1
-    if i in protected and i in negatives:
-        FP += 1
-    if i in positives and i not in protected:
-        FN += 1
-    if i in negatives and i not in protected:
-        TN += 1
-
-print(' TP: {}\n FP: {}\n TN: {}\n FN: {}\n'.format(TP, FP, TN, FN))
-print('Sensitivity: {}\n'.format(TP/(TP+FN)))
-print('Specificity: {}\n'.format(TN/(TN+FP)))
-print('PPV: {}\n'.format(TP/(TP+FP)))
-print('NPV: {}\n'.format(TN/(TN+FN)))
-print('Diagnostic Accuracy: {}'.format((TP + TN)/(TP + FP + FN + TN))) 
-print('Protection cutoff: {}\nSASA cutoff:{}\n'.format(prot_cut,sasa_cut))
-print(len(sasa))
-print(TP+FP+TN+FN)
-
diff --git a/msDMS_Saleem&Miller_2025/3b_final.py b/msDMS_Saleem&Miller_2025/3b_final.py
deleted file mode 100644
index 51e1989..0000000
--- a/msDMS_Saleem&Miller_2025/3b_final.py
+++ /dev/null
@@ -1,80 +0,0 @@
-import random
-import numpy as np
-from ReactivityProfile import ReactivityProfile as rprofile
-import RNAStructureObjects as RNAtools
-
-import matplotlib.pyplot as plot
-import scipy.stats
-
-import sys, os, math
-import argparse
-plot.rcParams['pdf.fonttype'] = 42
-plot.rcParams['font.sans-serif'] = 'Arial'
-
-parser = argparse.ArgumentParser()
-parser.add_argument('--real', required=True, help='path to profile')
-parser.add_argument('--random', required=True)
-parser.add_argument('--output', required=True, help='output prefix')
-parser.add_argument('--random2', required=True, help='2nd random dist')
-parser.add_argument('--real2', required=True, help='2nd real dist')
-args = parser.parse_args()
-
-
-def contact_filter(protections,CT1, filter_distance=0):
-    
-    num_within_range = {}
-    for i in protections:
-        count = 0
-        for j in protections:
-            if i == j:
-                continue
-            if CT1.contactDistance(i, j) > filter_distance:
-                
-                continue
-            else:
-                count += 1
-        num_within_range[i] = count
-    return num_within_range
-
-def parse_infile(input_f):
-    nts = {}
-    with open(input_f) as inp:
-        for i,line in enumerate(inp):
-            if i == 0:
-                continue
-            else:
-                spl = line.split()
-                if spl[2] == '0.0':
-                    continue
-                else:
-                    nts[int(spl[0])] = int(spl[1])
-    return nts
-
-def violin_plot(name, labels, data, p):
-    quants = []
-    for i in data:
-        quants.append([.25,.75])
-
-    fig, ax = plot.subplots(nrows = 1, ncols = 1)
-    ax.violinplot(data, showmedians=True)
-    ax.set_xticks(np.arange(1, len(labels) + 1), labels=labels)
-    #ax.set_ylim(0, 40)
-    ax.set_ylabel('number of protections within 20A')
-    ax.set_title('P = {}'.format(p))
-    fig.tight_layout(pad = 2)
-    fig.savefig(name + '.pdf')
-
-
-random_counts = parse_infile(args.random)
-counts = parse_infile(args.real)
-random_counts2 = parse_infile(args.random2)
-counts2 = parse_infile(args.real2)
-
-x_labels = ['16s Observed', '23s real', '16s Random Distribution', '23s random']
-data_toplot = [list(counts.values()), list(counts2.values()), list(random_counts.values()), list(random_counts2.values())]
-u, p = scipy.stats.mannwhitneyu(list(counts.values()), list(random_counts.values()), alternative='two-sided')
-u2, p2 = scipy.stats.mannwhitneyu(list(counts2.values()), list(random_counts2.values()), alternative='two-sided')
-print(p)
-print(p2)
-
-violin_plot(args.output, x_labels, data_toplot, str(p) + '\t' + str(p2))
\ No newline at end of file
diff --git a/msDMS_Saleem&Miller_2025/3d_calcs_final.py b/msDMS_Saleem&Miller_2025/3d_calcs_final.py
deleted file mode 100644
index 944f202..0000000
--- a/msDMS_Saleem&Miller_2025/3d_calcs_final.py
+++ /dev/null
@@ -1,103 +0,0 @@
-import argparse
-import matplotlib.pyplot as plot
-import numpy as np
-import random
-import copy
-
-plot.rcParams['pdf.fonttype'] = 42
-plot.rcParams['font.sans-serif'] = 'Arial'
-
-def read_prot_nts(fname):
-    sizes = []
-    with open(fname) as inp:
-        for line in inp:
-            sizes.append(int(line.strip()))
-    return sizes
-
-def read_primary_nts(fname):
-    p_locus = []
-    with open(fname) as inp:
-        for line in inp:
-            spl = line.split('-')
-            for i in range(int(spl[0]), int(spl[1]) + 1):
-                p_locus.append(i)
-    return p_locus
-
-def sample_fold(size_l,loci_nts, rna):
-    rna_copy = copy.deepcopy(rna)
-    length = len(rna_copy)
-    percent_cover = []
-    encapsulated = 0
-    selected = {}
-    for i in size_l:
-        nts_in = []
-        selected[i] = set()
-        while len(selected[i]) == 0:
-            prospective = set()
-            start = random.choice(rna_copy)
-            end = start + i
-            if end > length:
-                continue
-            for j in range(start, end + 1):
-                prospective.add(j)
-
-
-            if prospective.issubset(set(rna_copy)):
-                selected[i] = prospective
-        for j in selected[i]:
-            rna_copy.remove(j)
-            if j in loci_nts:
-                nts_in.append(j)
-        cover = len(nts_in) / len(selected[i])
-        percent_cover.append(cover)
-        if cover == 1.0:
-            encapsulated += 1
-
-    return encapsulated
-
-def histogram(en, label = '', title='',name='sfd_hist.pdf'):
-    fig, ax = plot.subplots()
-    ax.hist(en, histtype = 'bar', bins=[0,1,2,3,4,5,6,7,8,9,10],label=label, align='left')
-    ax.axvline(x=np.percentile(en, [50]), label=label + " median " + str(np.percentile(en, [50])))
-    ax.axvline(x=np.mean(en), label=label + " mean " + str(np.mean(en)))
-    ax.axvline(x=6, label='Observed')
-
-    
-    ax.set_ylabel('Count')
-    ax.set_xlabel('# of SFD encapsulated')
-    ax.set_xticks(range(10))
-    
-    ax.set_title(title)
-    ax.legend(loc='upper right')
-    fig.tight_layout()
-    plot.savefig(name)
-
-if __name__ == '__main__':
-    #1542 16s
-    #2904 23s
-    prot_nts = read_prot_nts('23s_selffoldingnts.txt')
-    prim_nts = read_primary_nts('23s_primarynts.txt')
-    #prot_nts = read_prot_nts('16s_selffoldingnts.txt')
-    #prim_nts = read_primary_nts('16s_primarynts.txt')
-    ssu = []
-    lsu = []
-    
-    #for i in range(1, 1542 + 1):
-    #    ssu.append(i)
-    
-    for i in range(1, 2904 + 1):
-        ssu.append(i)
-
-    #lsu_en = []
-    ssu_en = []
-
-    while len(ssu_en) < 10000:
-        ssu_en.append(sample_fold(prot_nts, prim_nts, ssu))
-
-    hit = []
-    for i in ssu_en:
-        if i == 6:
-            hit.append(i)
-    print(len(hit)/ len(ssu_en))
-    histogram(ssu_en, title='23s', name='23s_10k_sfd_hist.pdf')
-    
diff --git a/msDMS_Saleem&Miller_2025/4c+e.py b/msDMS_Saleem&Miller_2025/4c+e.py
deleted file mode 100644
index d625780..0000000
--- a/msDMS_Saleem&Miller_2025/4c+e.py
+++ /dev/null
@@ -1,367 +0,0 @@
-from sklearn.metrics import roc_curve, roc_auc_score
-
-import numpy as np
-
-from ReactivityProfile import ReactivityProfile as rprofile
-import RNAStructureObjects as RNAtools
-import matplotlib as mpl
-import matplotlib.pyplot as plot
-mpl.use('Agg')
-mpl.rcParams['pdf.fonttype'] = 42
-mpl.rcParams['font.sans-serif'] ='Arial'
-
-import sys, os, math
-import argparse
-
-
-
-parser = argparse.ArgumentParser()
-parser.add_argument('--pdbfile', required=True, help='path to crystal structure')
-parser.add_argument('--chain', type=str, required=True, help='RNA chain id in crystal structure')
-parser.add_argument('--inputRings', required=True, help='path to ringfile')
-parser.add_argument('--adjustFrame', type=int, default=0, help='adjust frame of profile to match crystal structure if necessary')
-parser.add_argument('--profile', required=True, help='path to profile')
-parser.add_argument('--ringType', choices=('N1', 'N1N7', 'N7'), required=True, help='type of rings to plot')
-parser.add_argument('--crystalStart', type=int, required=True, help='first position of the RNA chain')
-parser.add_argument('--crystalEnd', type=int, required=True, help='last position of the RNA chain')
-parser.add_argument('--outputName', required=True, help='Output prefix for pymol session')
-parser.add_argument('--contactFilter', type=int, default=0, help='contact filter distance')
-parser.add_argument('--concatenated', action='store_true', help='plot all types of rings')
-parser.add_argument('--filterNegative', action='store_true', help='filter negative rings')
-parser.add_argument('--ctFile', required=True, help='path to ct file')
-parser.add_argument('--APCcutoff', type=int, default=2, help='filter rings with a score lower than or equal to this number')
-parser.add_argument('--approx', action='store_true', help='draw ring to closest nt if nt is missing from crystal structure')
-parser.add_argument('--renderall', action='store_true', help='render all the rings if a total concat file is passed')
-parser.add_argument('--distanceFilter', type=int, default=99999, help='check average contact distance of things within filter(angstroms)')
-args = parser.parse_args()
-
-
-
-'''
-tool for parsing ring data and drawing it onto a crystal structure
-
-takes a profile, ringfile
-takes a chain of interest for a crystal structure
-takes first and last residue numbers of that chain to get things in frame
-takes a frameshift integer
-'''
-
-cmd.load(args.pdbfile)
-
-profile = rprofile(args.profile)
-
-rings = []
-residues = []
-
-def adjust_frame(profile, ring_list, difference):
-    profile.nts = [x + difference for x in profile.nts]
-    for i in ring_list:
-        
-        i[0] += difference
-        i[1] += difference
-
-
-def parse_ringmap(cutoff, name='', filterneg=False):
-    rings = []
-    with open(name) as inp:
-        for p, line in enumerate(inp):
-            if p == 0 or p == 1:
-                continue
-            else:
-                split = line.split()
-                #split[2] for sig split[9] for alpha
-                if float(split[9]) <= cutoff:
-                    continue
-                    
-                if filterneg:
-                    if split[3] == '-1':
-                        
-                        continue
-                rings.append([int(split[0]), int(split[1]), float(split[9])])
-    return rings
-
-def contact_filter(inrings, filter_distance=0, ctfile=None):
-    
-    rings = []
-    CT1 = None
-    if ctfile is not None:
-        CT1 = RNAtools.CT(ctfile)
-    
-    for i in inrings:
-            #print(CT1.contactDistance(i[0], i[1]))
-            #print(str(i[0]) + ' ' + str(i[1]))
-            if CT1.contactDistance(i[0], i[1]) <= filter_distance:
-                #print('filtered')
-                continue
-            else:
-                
-                i.append(CT1.contactDistance(i[0],i[1]))
-                print("contact filter add {}".format(i))
-                rings.append(i)
-
-    return rings
-
-def split_concat(concat_list, profile):
-    last =  profile.nts[-1]
-    n1_rings = []
-    n1n7_rings = []
-    n7_rings = []
-    for i in concat_list:
-        if i[0] > last and i[1] > last:
-            i[0] -= last
-            i[1] -= last
-            n7_rings.append(i)
-        elif i[0] < last and i[1] < last:
-            n1_rings.append(i)
-        else:
-            n1n7_rings.append(i)
-            if i[0] > last:
-                i[0] -= last
-            else:
-                i[1] -= last
-    print(len(n1_rings), len(n1n7_rings), len(n7_rings))
-    return(n1_rings, n1n7_rings, n7_rings)
-
-def closest(lst, K):
-     
-    return lst[min(range(len(lst)), key = lambda i: abs(lst[i]-K))]
-
-
-def draw(ring_list, chain, crystalstart, crystalend, approx, ringtype, render=False, distancefilter=0):
-    distances = []
-    scores = []
-
-    
-    #get all the residues in a chain. some crystal structures are missing residues in the middle
-    
-    dchain = 'chain {}'.format(chain)
-    cmd.iterate(dchain,'residues.append(int(resi))')
-    fresidues = [*set(residues)]
-    #print('residues {}'.format(fresidues))
-    
-    
-    if ringtype == 'N1':
-        colors = [(44,123,182), (44,123,182), (171,217,233), (255,255,255), (253,174,97), (215,25,28), (215,25,28)]
-    else:
-        colors = [(91, 218, 190), (91, 218, 190), (0, 95, 154), (255, 255, 255), (218, 91, 172), (83, 48, 95), (83, 48, 95)]
-
-    #change to (20,100) for sig
-    cutoffs = [-1e10, -5, -2, 0, 2, 5, 1e10]
-    #alpha = [1.0, 1.0, 0.0, 0.0, 1.0, 1.0]
-    gradiant = [False, True, False, False, True, False]
-
-    for v,i in enumerate(ring_list):
-        linestart = i[0]
-        startstr = 'start_{}'.format(v+1)
-        endstr = 'end_{}'.format(v+1)
-        lineend = i[1]
-
-        
-        if linestart not in fresidues:
-            if approx:
-                linestart = closest(fresidues, linestart)
-            else:
-                continue
-        if lineend not in fresidues:
-            if approx:
-                lineend = closest(fresidues, lineend)
-            else:
-                continue
-
-        if linestart  <= crystalstart or lineend <= crystalstart:
-            
-            continue
-        elif linestart >= crystalend or lineend >= crystalend:
-            
-            continue
-
-        else:
-            distance = cmd.get_distance(atom1='chain {} and resi {} and name P'.format(chain, linestart), atom2='chain {} and resi {} and name P'.format(chain, lineend))
-            
-            if distance < distancefilter:
-                try:
-                    print(i)
-                    scores.append(i[3] / distance)
-                except:
-                    print('FAIL at ring {}'.format(i))
-            
-
-            distances.append(distance)
-            
-            for ind in range(len(cutoffs) - 1):
-                if cutoffs[ind] < i[2] <= cutoffs[ind + 1]:
-                    if gradiant[ind]:
-                        calccolor = colorGrad(i[2], colors[ind], colors[ind + 1], cutoffs[ind], cutoffs[ind + 1])
-                    else:
-                        calccolor = colors[ind]
-
-            cmd.set_color("color {}".format(v+1),calccolor)
-
-            cmd.select(startstr, 'chain {} and resi {} and name P'.format(chain, linestart))
-            start = cmd.get_coords('chain {} and resi {} and name P'.format(chain, linestart), 1)
-            
-            cmd.pseudoatom('points', name='start_{}'.format(v+1), pos=tuple(start[0]), color="color {}".format(v+1))
-        
-            cmd.deselect()
-        
-            cmd.select(endstr, 'chain {} and resi {} and name P'.format(chain, lineend))
-            end = cmd.get_coords('end_{}'.format(v+1), 1)
-            cmd.pseudoatom('points', name='end_{}'.format(v+1), pos=tuple(end[0]), color="color {}".format(v+1))
-
-            cmd.deselect()
-            if render:
-                print('rendering rings')
-                cmd.bond('points and name start_{}'.format(v+1), 'points and name end_{}'.format(v+1))
-
-    print("scores {}".format(scores))
-    try:
-        print("average score {}".format(sum(scores)/len(scores)))
-    except:
-        print('no rings in category')
-    return distances
-
-
-def filter_unprot_pos(ring_list, gaprofile):
-    '''
-    should be run after adjust_frame if applicable
-    '''
-
-    normvalues = {}
-    for i, v in enumerate(gaprofile.nts):
-        if np.isfinite(gaprofile.normprofile[i]):
-            normvalues[v] =  gaprofile.normprofile[i]
-
-    for i in ring_list:
-        if i[0] in normvalues and normvalues[i[0]] <= 2:
-            ring_list.remove(i)
-        if i[1] in normvalues and normvalues[i[1]] <= 2:
-            ring_list.remove(i)
-
-    return ring_list
-
-
-def filter_sig(ring_list, cutoff):
-    for i in ring_list:
-        if float(i[3]) < cutoff:
-            ring_list.remove(i)
-    return ring_list
-
-def filter_unconcat(concat_list, unconcat_list):
-    print('concat length {}'.format(len(concat_list)))
-    num_matches = 0
-    
-    non_matches = []
-    original = concat_list.copy()
-    for i in unconcat_list:
-        if i in original:
-            num_matches += 1
-            concat_list.remove(i)
-            
-        elif i not in original:
-            non_matches.append(i)
-    
-
-    for i in non_matches:
-        i[0] += 1
-    for i in non_matches:
-        if i in concat_list:
-            num_matches += 1
-    print(concat_list, '\n')
-    print(non_matches)
-    print('{} matches'.format(num_matches))
-
-    print(len(unconcat_list))
-    print("{} filtered length".format(len(concat_list)))
-    return concat_list
-
-
-
-def dist_histogram(dist_list, bins=10, title='',name='dist_hist.pdf'):
-    fig, ax = plot.subplots()
-    label = ''
-    colors = ['blue', 'orange', 'green']
-    for i, v in enumerate(dist_list):
-        if len(v) <= 0:
-            continue
-        if i == 0:
-            label = "N7"
-        elif i == 1:
-            label = 'N1'
-        elif i == 2:
-            label = "N7"
-        
-        ax.hist(v, histtype = 'step', bins = bins, label=label)
-        ax.axvline(x=np.percentile(v, [50]), color=colors[i],label=label + " median " + str(np.percentile(v, [50])))
-        ax.axvline(x=np.mean(v), color=colors[i],label=label + " mean " + str(np.mean(v)))
-
-    
-    ax.set_ylabel('Count')
-    ax.set_xlabel('P-P Distance A')
-    ax.set_xlim(0, 70)
-    
-    #ax.set_ylim(20,150)
-    ax.set_title(title)
-    ax.legend(loc='upper right')
-    fig.tight_layout()
-    plot.savefig(name)
-
-def colorGrad(value, colorMin, colorMax, minValue, maxValue, log=False):
-        """ return an interpolated rgb color value """
-        
-        if log:
-            value = 10**-value
-            minValue = 10**-minValue
-            maxValue = 10**-maxValue
-
-        if value > maxValue:
-            return colorMax
-        elif value < minValue:
-            return colorMin
-        else:
-            v = value - min(maxValue, minValue)
-            v /= abs(maxValue-minValue)
-        
-            col = []
-            for i in range(3):
-                col.append( v*(colorMax[i] - colorMin[i]) + colorMin[i] )
-            
-            return col
-    
-
-rings = parse_ringmap(args.APCcutoff, args.inputRings, args.filterNegative)
-N1_rings, N1N7_rings, N7_rings = split_concat(rings, profile)
-
-if args.concatenated:
-    args.ringType= 'N7'
-    N7_rings.extend(N1N7_rings)
-    #N1_rings.extend(N7_rings)
-    pass
-
-rings_toplot = []
-if args.ringType == 'N7':
-    rings_toplot = N7_rings.copy()
-elif args.ringType == 'N1N7':
-    rings_toplot = N1N7_rings.copy()
-else:
-    rings_toplot = N1_rings.copy()
-
-print('precf {}'.format(rings_toplot))
-if args.contactFilter > 0:
-    rings_toplot = contact_filter(rings_toplot, filter_distance=args.contactFilter, ctfile=args.ctFile)
-    N1_rings =  contact_filter(N1_rings, filter_distance=args.contactFilter, ctfile=args.ctFile)
-    N1N7_rings = contact_filter(N1N7_rings, filter_distance=args.contactFilter, ctfile=args.ctFile)
-    N7_rings =  contact_filter(N7_rings, filter_distance=args.contactFilter, ctfile=args.ctFile)
-
-
-
-
-adjust_frame(profile, rings_toplot, args.adjustFrame)
-dist_data = []
-dist_data1 = draw(rings_toplot, args.chain, args.crystalStart, args.crystalEnd, args.approx, args.ringType, render=True)
-dist_data2 = draw(N1_rings, args.chain, args.crystalStart, args.crystalEnd, args.approx, args.ringType, render=False)
-dist_data.append(dist_data1)
-dist_data.append(dist_data2)
-
-dist_histogram(dist_data, bins=np.linspace(0,50,10), title='{} {}cf filterneg={} rings'.format(args.inputRings, args.contactFilter, str(args.filterNegative)), name='{} {}cf filterneg={} rings.pdf'.format(args.inputRings, args.contactFilter, str(args.filterNegative)))
-plot.close('all')
-cmd.save(args.outputName + '.pse')
\ No newline at end of file
diff --git a/msDMS_Saleem&Miller_2025/Figure_1B_final.py b/msDMS_Saleem&Miller_2025/Figure_1B_final.py
deleted file mode 100644
index 188bfef..0000000
--- a/msDMS_Saleem&Miller_2025/Figure_1B_final.py
+++ /dev/null
@@ -1,42 +0,0 @@
-import argparse
-from ReactivityProfile import ReactivityProfile as rprofile
-import matplotlib as mpl
-import numpy as np
-mpl.use('Agg')
-mpl.rcParams['pdf.fonttype'] = 42
-mpl.rcParams['font.sans-serif'] = 'Arial'
-from matplotlib import pyplot as plot
-
-argparser = argparse.ArgumentParser()
-argparser.add_argument('--ssu_old')
-argparser.add_argument('--ssu_new')
-argparser.add_argument('--lsu_old')
-argparser.add_argument('--lsu_new')
-argparse.add_argument('--out')
-args = argparser.parse_args()
-small_old = rprofile(args.ssu_old)
-large_old = rprofile(args.lsu_old)
-small_new = rprofile(args.ssu_new)
-large_new = rprofile(args.lsu_new)
-
-
-old = [small_old.rawprofile[52], large_old.rawprofile[72]]
-new = [small_new.rawprofile[52],large_new.rawprofile[72]]
-
-
-xaxis = ['16s', '23s']
-barcount = 2
-ind = np.arange(barcount)
-width = 0.3
-
-fig, ax = plot.subplots()
-
-ax.bar(ind,new, width, label='new')
-ax.bar(ind+width , old, width, label='old')
-ax.set_xticks(ind + width/2, ('16s', '23s'))
-
-
-ax.set_title('m7g reactivities')
-ax.legend(loc='upper right')
-fig.tight_layout()
-plot.savefig(args.out)
\ No newline at end of file
diff --git a/msDMS_Saleem&Miller_2025/README.md b/msDMS_Saleem&Miller_2025/README.md
deleted file mode 100644
index a5d4f3e..0000000
--- a/msDMS_Saleem&Miller_2025/README.md
+++ /dev/null
@@ -1 +0,0 @@
-Code that is not a part of StructureAnalysisTools and relevant to the figures of msDMS-MaP(2025).
diff --git a/msDMS_Saleem&Miller_2025/SI1_C_final.py b/msDMS_Saleem&Miller_2025/SI1_C_final.py
deleted file mode 100644
index ec7bf1f..0000000
--- a/msDMS_Saleem&Miller_2025/SI1_C_final.py
+++ /dev/null
@@ -1,46 +0,0 @@
-import sys
-from ReactivityProfile import ReactivityProfile
-import matplotlib as mpl
-import matplotlib.pyplot as plt
-import numpy as np
-import argparse
-mpl.use('Agg')
-mpl.rcParams['font.family'] = 'sans-serif'
-mpl.rcParams['pdf.fonttype'] = 42
-mpl.rcParams['font.sans-serif'] = 'Arial'
-
-argparser = argparse.ArgumentParser()
-argparser.add_argument('--prof1')
-argparser.add_argument('--prof2')
-argparser.add_argument('--out')
-args = argparser.parse_args()
-
-prof1 = ReactivityProfile(args.prof1)
-prof2 = ReactivityProfile(args.prof2)
-
-fig, ax = plt.subplots()
-
-ax.step(prof1.nts, (prof1.rawprofile * 100), label=args.prof1, where='mid', linewidth=2, linestyle='-')
-ax.step(prof1.nts, (prof2.rawprofile * 100), label=args.prof2, where='mid', linewidth=2)
-
-#52 for ssu 72 for lsu
-seq =  [str(i) for i in prof1.sequence]
-#seq[72] = 'm7G'
-seq[52] = 'm7G'
-
-ax.set_xlabel('Nucleotide', fontsize=20)
-ax.set_xticks(prof1.nts)
-ax.set_xticklabels(seq)
-
-#ax.set_xlim([65,82])
-ax.set_xlim([44,64])
-
-
-ax.set_ylabel('Mutation rate(%)', fontsize=20)
-ax.set_ylim([0, 25])
-
-ax.legend()
-fig.tight_layout()
-
-fig.savefig(args.out)
-
diff --git a/msDMS_Saleem&Miller_2025/SIfig1_1b_final.py b/msDMS_Saleem&Miller_2025/SIfig1_1b_final.py
deleted file mode 100644
index 3186c5a..0000000
--- a/msDMS_Saleem&Miller_2025/SIfig1_1b_final.py
+++ /dev/null
@@ -1,108 +0,0 @@
-from ReactivityProfile import ReactivityProfile as rp
-import matplotlib as mpl
-import numpy as np
-mpl.use('Agg')
-mpl.rcParams['pdf.fonttype'] = 42
-mpl.rcParams['font.sans-serif'] = 'Arial'
-from matplotlib import pyplot as plot
-from matplotlib.ticker import AutoMinorLocator
-
-def get_data(listofprofs, subunit):
-    m7gs_mn = []
-    bgs_mn = []
-    if subunit == 'large':
-        x = 72
-    else:
-        print('smallsub')
-        x = 52
-    for i in listofprofs:
-        print(i.rawprofile)
-        m7gs_mn.append(i.rawprofile[x])
-        allreactivities = 0
-        for j in i.rawprofile:
-            if np.isfinite(j):
-                allreactivities += j
-        allreactivities -= i.rawprofile[x]
-        bg = allreactivities / (len(i.rawprofile) - 1)
-        bgs_mn.append(bg)
-    return m7gs_mn, bgs_mn
-
-
-Original_16s = rp('ph83_16s_16sm7_profile.txt')
-half_mn_16s = rp('05mM_16sm7_profile.txt')
-two_mn_16s =rp('2mM_16sm7_profile.txt')
-two5_mn_16s = rp('25ph83vs2ph83_16sm7_profile.txt')
-three_mn_16s = rp('3mM_16sm7_profile.txt')
-
-
-ph75_16s = rp('ph75_16sm7_profile.txt')
-ph8_16s = rp('ph8_16sm7_profile.txt')
-ph85_16s = rp('ph85_16sm7_profile.txt')
-finalcondition_16s = rp('2mmph9_16sm7_profile.txt')
-
-Original_23s = rp('ph83_23sm7_profile.txt')
-half_mn_23s = rp('05mM_23sm7_profile.txt')
-two_mn_23s = rp('2mM_23sm7_profile.txt')
-two5_mn_23s = rp('25mM_23sm7_profile.txt')
-three_mn_23s = rp('3mM_23sm7_profile.txt')
-
-ph75_23s = rp('ph75_23sm7_profile.txt')
-ph8_23s = rp('ph8_23sm7_profile.txt')
-ph85_23s = rp('ph85_16sm7_profile.txt')
-finalcondition_23s = rp('2mmph9_23sm7_profile.txt')
-
-ssu_manganese_m7gs, ssu_manganese_bgs = get_data([half_mn_16s, Original_16s, two_mn_16s, two5_mn_16s, three_mn_16s], '')
-ssu_ph_m7gs, ssu_ph_bgs = get_data([ph75_16s, ph8_16s, Original_16s, ph85_16s], '')
-ssu_final_m7g, ssu_final_bg = get_data([finalcondition_16s], '')
-
-lsu_manganese_m7gs, lsu_manganese_bgs = get_data([half_mn_23s, Original_23s, two_mn_23s, two5_mn_23s, three_mn_23s], 'large')
-lsu_ph_m7gs, lsu_ph_bgs = get_data([ph75_23s, ph8_23s, Original_23s, ph85_23s], 'large')
-lsu_final_m7g, lsu_final_bg = get_data([finalcondition_23s], 'large')
-
-
-manganese_m7gs = [np.mean(i) for i in zip(ssu_manganese_m7gs,lsu_manganese_m7gs)]
-ph_m7gs = [np.mean(i) for i in zip(ssu_ph_m7gs,lsu_ph_m7gs)]
-final_m7g = [np.mean(i) for i in zip(ssu_final_m7g,lsu_final_m7g)]
-
-
-manganese_bgs = [np.mean(i) for i in zip(ssu_manganese_bgs,lsu_manganese_bgs)]
-ph_bgs = [np.mean(i) for i in zip(ssu_ph_bgs,lsu_ph_bgs)]
-final_bg = [np.mean(i) for i in zip(ssu_final_m7g,lsu_final_bg)]
-
-
-#add in empty data so final condition can be included in plot
-ph_m7gs.append(0.0)
-ph_bgs.append(0.0)
-
-labels_1 = ['0.5', '1', '2', '2.5', '3']
-labels_2 = ['7.5', '8.0', '8.3', '8.5', '9']
-labels_3 = ['2mM Mn pH 8.3', '1mM Mn pH 9', '2mM Mn pH 9']
-fig,ax = plot.subplots(1, 4, figsize=(10,2))
-
-#ax[2] = ax[0].twinx()
-#share yaxis
-ax[0].set_title('Manganese Concentration')
-ax[0].scatter(labels_1, manganese_m7gs)
-ax[0].plot(labels_1, manganese_m7gs)
-#add bg data
-ax[0].scatter(labels_1, manganese_bgs)
-ax[0].scatter(labels_1, [0.0,0.0,final_m7g[0],0.0,0.0])
-ax[0].plot(labels_1, manganese_bgs)
-ax[0].set_yscale('log', base=10)
-
-
-ax[1].set_title('Acidity')
-ax[1].scatter(labels_2, ph_m7gs)
-ax[1].plot(labels_2, ph_m7gs)
-#add bg data
-ax[1].scatter(labels_2, ph_bgs)
-ax[1].plot(labels_2, ph_bgs)
-ax[1].scatter(labels_2, [0.0, 0.0 ,0.0 ,0.0, final_m7g[0]])
-ax[1].set_yscale('log', base=10)
-#ax[3] = ax[1].twinx()
-
-
-
-
-fig.tight_layout()
-plot.savefig('SIFigure_1A_averages.pdf')
\ No newline at end of file
diff --git a/msDMS_Saleem&Miller_2025/SIfig1_1d_final.py b/msDMS_Saleem&Miller_2025/SIfig1_1d_final.py
deleted file mode 100644
index 461632e..0000000
--- a/msDMS_Saleem&Miller_2025/SIfig1_1d_final.py
+++ /dev/null
@@ -1,27 +0,0 @@
-import pandas as pd
-import matplotlib as mpl
-import numpy as np
-mpl.use('Agg')
-mpl.rcParams['pdf.fonttype'] = 42
-mpl.rcParams['font.sans-serif'] = 'Arial'
-from matplotlib import pyplot as plt
-
-#df = pd.read_csv('2mmph9_Modified_16sm7_mutation_counts.txt',sep='\t',header=(0))
-df = pd.read_csv('ph9_Modified_23sm7_mutation_counts.txt',sep='\t',header=(0))
-
-#23s
-m7g = df.iloc[72]
-#16s
-#m7g = df.iloc[52]
-
-
-labels = 'GA', 'Other'
-data = [m7g['GA'], m7g['GT'] + m7g['GC'] + m7g['complex_insertion'] + m7g['complex_deletion'] + m7g['G_multinuc_mismatch']]
-
-fig, ax = plt.subplots()
-ax.pie(data, labels=labels, autopct='%1.1f%%')
-
-print(m7g['GA'])
-
-fig.tight_layout()
-plt.savefig('SIFigure_1C_23sfinalRT.pdf')
\ No newline at end of file
diff --git a/msDMS_Saleem&Miller_2025/figure1c_final.py b/msDMS_Saleem&Miller_2025/figure1c_final.py
deleted file mode 100644
index c18bd38..0000000
--- a/msDMS_Saleem&Miller_2025/figure1c_final.py
+++ /dev/null
@@ -1,35 +0,0 @@
-from ReactivityProfile import ReactivityProfile as rprofile
-import argparse
-import matplotlib as mpl
-import numpy as np
-mpl.use('Agg')
-mpl.rcParams['pdf.fonttype'] = 42
-mpl.rcParams['font.sans-serif'] = 'Arial'
-from matplotlib import pyplot as plot
-
-argparser = argparse.ArgumentParser()
-argparser.add_argument('--sat')
-argparser.add_argument('--unsat')
-argparser.add_argument('--out')
-args = argparser.parse_args()
-
-satTPP = rprofile(args.sat)
-unsatTPP = rprofile(args.unsat)
-
-fig, ax = plot.subplots()
-
-
-#manual bg sub
-unsat_man_bg = np.nan_to_num(unsatTPP.rawprofile[18:120]) - np.nan_to_num(unsatTPP.backprofile[18:120])
-sat_man_bg = np.nan_to_num(satTPP.rawprofile[18:120]) - np.nan_to_num(satTPP.backprofile[18:120])
-
-
-ax.step(satTPP.nts[18:120], unsat_man_bg, label='Unsaturated', where='mid')
-ax.step(satTPP.nts[18:120], sat_man_bg, label='Saturated', where='mid')
-
-
-
-ax.set_title('N7 reactivities')
-ax.legend(loc='upper right')
-fig.tight_layout()
-plot.savefig(args.out)
\ No newline at end of file
diff --git a/msDMS_Saleem&Miller_2025/random_ringsample.py b/msDMS_Saleem&Miller_2025/random_ringsample.py
deleted file mode 100644
index c97733a..0000000
--- a/msDMS_Saleem&Miller_2025/random_ringsample.py
+++ /dev/null
@@ -1,103 +0,0 @@
-import argparse
-import matplotlib.pyplot as plot
-import numpy as np
-import random
-import copy
-
-plot.rcParams['pdf.fonttype'] = 42
-plot.rcParams['font.sans-serif'] = 'Arial'
-
-def read_ring(input_rings):
-    length = 0
-    n1rings = []
-    n7rings = []
-    with open(input_rings) as ringin:
-        
-        for i,line in enumerate(ringin):
-            if i > 1:
-                spl = line.split()
-                n1 = int(spl[0])
-                n2 = int(spl[1])
-                a = float(spl[9])
-                #print(a)
-                if a < 2:
-                    continue
-                else:
-                    if n1 <= length and n2 <= length: #filter n1 rings
-                        n1rings.append((n1,n2))
-                    else:
-                        n7rings.append((n1,n2))
-            else:
-                if i == 0:
-                    length = int(line.split()[0])
-
-    #print(n1rings)
-    return length,n1rings, n7rings
-
-def read_primary_nts(fname):
-    p_locus = []
-    with open(fname) as inp:
-        for line in inp:
-            spl = line.split('-')
-            for i in range(int(spl[0]), int(spl[1]) + 1):
-                p_locus.append(i)
-    print(p_locus)
-    return p_locus
-
-def histogram(en, label = '', title='',name='sfd_hist.pdf'):
-    fig, ax = plot.subplots()
-    ax.hist(en, histtype = 'bar', bins=[0.0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0],label=label, align='left')
-    ax.axvline(x=np.percentile(en, [50]), label=label + " median " + str(np.percentile(en, [50])))
-    ax.axvline(x=np.mean(en), label=label + " mean " + str(np.mean(en)))
-    ax.axvline(x=.84, label='Observed')
-
-    
-    ax.set_ylabel('Count')
-    ax.set_xlabel('% rings between tertiary sites')
-
-    
-    ax.set_title(title)
-    ax.legend(loc='upper right')
-    fig.tight_layout()
-    plot.savefig(name)
-
-def sample_fold(rings,loci_nts, rna):
-    encapsulated = 0
-    for i in rings:
-        rna_copy = copy.deepcopy(rna)
-        nt1 = random.choice(rna_copy)
-        rna_copy.remove(nt1)
-        if nt1 - 1 in rna_copy:
-            rna_copy.remove(nt1 - 1)
-        if nt1 + 1 in rna_copy:
-            rna_copy.remove(nt1 + 1)
-        nt2 = random.choice(rna_copy)
-        #print(nt1, nt2)
-        if nt1 in loci_nts and nt2 in loci_nts:
-            encapsulated += 1
-    return encapsulated / len(rings)
-
-if __name__ == '__main__':
-    # tpp_tertnts.txt p546_tertnts.txt
-    loci = read_primary_nts('p546_tertnts.txt')
-
-    # p546_r2r3_intersect_shared_corrbuffer.txt satTPP_intersect_shared_corrbuffer.txt
-    rna_len,n1,n7 = read_ring('p546_r2r3_intersect_shared_corrbuffer.txt')
-
-    rna_nts = []
-    for i in range(1, rna_len + 1):
-        rna_nts.append(i)
-
-    samples = []
-    while len(samples) < 10000:
-        samples.append(sample_fold(n1, loci, rna_nts))
-    print(samples)
-
-    
-    hit = []
-    for i in samples:
-        if i >= 0.83: # 
-            hit.append(i)
-    print(len(hit)/ len(samples))
-    histogram(samples, title='P546', name='P546_10k_ringsample_hist.pdf')
-   
diff --git a/msDMS_Saleem&Miller_2025/si2_d+e_final.py b/msDMS_Saleem&Miller_2025/si2_d+e_final.py
deleted file mode 100644
index df692f5..0000000
--- a/msDMS_Saleem&Miller_2025/si2_d+e_final.py
+++ /dev/null
@@ -1,67 +0,0 @@
-from json.tool import main
-import matplotlib
-matplotlib.use('Agg')
-import matplotlib.pyplot as plot
-import sys
-import RNAtools2 as RNAtools
-import numpy as np
-from sklearn.metrics import roc_curve, roc_auc_score
-from ReactivityProfile import ReactivityProfile
-plot.rcParams['pdf.fonttype'] = 42
-plot.rcParams['font.sans-serif'] = 'Arial'
-import argparse
-
-argparser = argparse.ArgumentParser()
-argparser.add_argument('--prof1')
-argparser.add_argument('--prof2')
-argparser.add_argument('--ctfile')
-argparser.add_argument('--out')
-args = argparser.parse_args()
-profobj = ReactivityProfile(args.prof1)
-prof2obj = ReactivityProfile(args.prof2)
-ctobj = RNAtools.CT(args.ctfile)
-pairedlist = ctobj.pairedResidueList(False)
-
-def getNtLists(profile, pairedlist, ntype):
-    
-        react = []
-        ispaired = []
-        for i,v in enumerate(profile.subprofile):               
-            if v > -10 and profile.sequence[i] == ntype:
-                react.append(v)
-                ispaired.append( int((i+1) in pairedlist) )
-            
-        print(react)
-        return react, ispaired
-
-
-labels = ('A', 'C', 'G', 'U')
-scores = []
-scores2 = []
-
-fig,ax = plot.subplots(1, 4, figsize=(8,2))
-
-for i,nt in enumerate(labels):
-    r, p = getNtLists(profobj, pairedlist, nt)
-    scores.append(roc_auc_score(p,r))
-
-for i,nt in enumerate(labels):
-    r, p = getNtLists(prof2obj, pairedlist, nt)
-    scores2.append(roc_auc_score(p,r))
-
-
-fig,ax = plot.subplots()
-
-barcount = 4
-ind = np.arange(barcount)
-width = 0.3
-
-fig, ax = plot.subplots()
-
-ax.bar(ind,scores, width, label='new')
-ax.bar(ind+width , scores2, width, label='old')
-ax.set_xticks(ind + width/2, labels)
-
-
-fig.tight_layout()
-plot.savefig(args.out)
\ No newline at end of file
diff --git a/msDMS_Saleem&Miller_2025/si2_g_final.py b/msDMS_Saleem&Miller_2025/si2_g_final.py
deleted file mode 100644
index 719f634..0000000
--- a/msDMS_Saleem&Miller_2025/si2_g_final.py
+++ /dev/null
@@ -1,60 +0,0 @@
-import re
-import sys
-import argparse
-from sklearn.metrics import roc_curve, roc_auc_score
-import matplotlib.pyplot as pyplot
-import numpy as np
-
-from ReactivityProfile import ReactivityProfile as rprofile
-import RNAStructureObjects as RNAtools
-
-argparser = argparse.ArgumentParser()
-argparser.add_argument('--n7profile')
-argparser.add_argument('--ctfile')
-argparser.add_argument('--out')
-args = argparser.parse_args()
-
-n7 = rprofile(args.n7profile)
-
-bgprof = n7.backprofile
-rawprof = n7.rawprofile
-bgsub = n7.subprofile
-normprof = n7.normprofile
-
-react, seq = [],[]
-x=0
-
-
-while x < (len(n7.sequence)):
-    seq.append(n7.sequence[x])
-    react.append(n7.subprofile[x])
-    x += 1
-
-
-ct = RNAtools.CT(args.ctfile)
-pairs = ct.pairedResidueList()
-
-s,r = [],[]
-
-# go through the RNA, only looking at Gs with defined reactivites (not NaN), check to see if the G measured is in the crystal structure as well
-for i,v in enumerate(seq):
-    if v == 'G' and np.isfinite(react[i]):
-        r.append(react[i])
-        if i in pairs:
-            s.append(True)
-        else:
-            s.append(False)
-
-
-# compute ROC curve and plot, along with AUC
-tmp = roc_curve(s, r, drop_intermediate=False)
-pyplot.plot(tmp[0], tmp[1], label='Basepairing: {:.2f}'.format(roc_auc_score(s,r)))
-    
-
-pyplot.plot([0, 1], [0, 1], linestyle="--")
-pyplot.xlim([0.0, 1.0])
-pyplot.ylim([0.0, 1.05])
-pyplot.title('')
-pyplot.legend()
-
-pyplot.savefig(args.out)
\ No newline at end of file
diff --git a/msDMS_Saleem&Miller_2025/si2_h+i_final.py b/msDMS_Saleem&Miller_2025/si2_h+i_final.py
deleted file mode 100644
index 450bf1c..0000000
--- a/msDMS_Saleem&Miller_2025/si2_h+i_final.py
+++ /dev/null
@@ -1,238 +0,0 @@
-import pandas as pd
-from sklearn.metrics import roc_curve, roc_auc_score
-import matplotlib.pyplot as plot
-import numpy as np
-import scipy.stats
-import math
-import itertools
-from ReactivityProfile import ReactivityProfile as rprofile
-import argparse
-
-plot.rcParams['pdf.fonttype'] = 42
-plot.rcParams['font.sans-serif'] = 'Arial'
-
-argparser = argparse.ArgumentParser()
-argparser.add_argument('--prof1', required=True)
-argparser.add_argument('--prof2')
-argparser.add_argument('--out', required=True)
-args = argparser.parse_args()
-
-
-# read in the N7 reactivity file
-n7 = rprofile(args.prof1)
-n72 = rprofile(args.prof2)
-
-
-
-def reactivityByneighbors(self, add=None, GA=False, Triplet=False, pos=1, byNT=False, name=''):
-    '''
-    Takes a reactivity profile as an argument.
-    Plots reactivity rates based on neighbors. 
-    Pass a negative number to look upstream, positive for downstream. 
-    Will group purines and pyrimidines by default.
-    '''
-
-    def adjacent_values(vals, q1, q3):
-        upper_adjacent_value = q3 + (q3 - q1) * 1.5
-        upper_adjacent_value = np.clip(upper_adjacent_value, q3, vals[-1])
-
-        lower_adjacent_value = q1 - (q3 - q1) * 1.5
-        lower_adjacent_value = np.clip(lower_adjacent_value, vals[0], q1)
-        return lower_adjacent_value, upper_adjacent_value
-
-
-    def set_axis_style(ax, labels):
-        ax.set_xticks(np.arange(1, len(labels) + 1), labels=labels)
-        ax.set_xlim(0.25, len(labels) + 0.75)
-        ax.set_xlabel('Sample name')
-
-    if add is not None:
-        self.subprofile = np.concatenate([self.subprofile, add.subprofile])
-        self.sequence = np.concatenate([self.sequence, add.sequence])
-
-    def stars(p):
-    ##get stars for annotating plots using a p value
-        if p < 0.000001:
-            return '******'
-        elif p < 0.00001:
-            return '*****'
-        elif p < 0.0001:
-            return "****"
-        elif (p < 0.001):
-            return "***"
-        elif (p < 0.01):
-            return "**"
-        elif (p < 0.05):
-            return "*"
-        else:
-            return "-"
-
-    if Triplet:
-            nts = ['A','G','U','C']
-            _triplets = {}
-            for i in nts:
-                for j in nts:
-                    for k in nts:
-                        _triplets[i + j + k] = []
-                
-            for n in nts:
-                for i, v in enumerate(self.subprofile):
-                    if np.isfinite(v) and self.sequence[i] == n:
-                        pre = self.sequence[i - 1]
-                        nxt = self.sequence[i + 1]
-                        _triplets[pre + n + nxt].append(v)
-
-
-    elif not GA:
-            '''
-            Data is ordered (A) purines, pyrimidines then (G) purines, pyrimidines, etc.
-            
-            '''
-
-
-            nts = ['A','G','U','C']
-            data = []
-            labels = []
-            for n in nts:
-                A = []
-                G = []
-                T = []
-                C = []
-                purines = []
-                pyrimidine = []
-                for i,v in enumerate(self.subprofile):
-                    if np.isfinite(v) and self.sequence[i] == n:
-                        if byNT:
-                            if self.sequence[i + pos] == 'A':
-                                A.append(v)
-                            elif self.sequence[i + pos] == 'G':
-                                G.append(v)
-                            elif self.sequence[i + pos] == 'U':
-                                T.append(v)
-                            else:
-                                C.append(v)
-                        else:
-                            if self.sequence[i + pos] == 'A' or self.sequence[i + pos] == 'G':
-                                purines.append(v)
-                            else:
-                                pyrimidine.append(v)
-                if byNT:
-                    data.append(A.copy())
-                    data.append(G.copy())
-                    data.append(T.copy())
-                    data.append(C.copy())
-                    labels.append(str(n) + ' A')
-                    labels.append(str(n) + ' G')
-                    labels.append(str(n) + ' T')
-                    labels.append(str(n) + ' C')
-                else:
-                    data.append(purines.copy())
-                    data.append(pyrimidine.copy())
-                    labels.append(str(n) + ' Purines')
-                    labels.append(str(n) + ' Pyrimadines')
-
-    else:
-        if byNT:
-            A = []
-            G = []
-            T = []
-            C = []
-
-            for i, v in enumerate(self.subprofile):
-                if np.isfinite(v):
-                    if self.sequence[i + pos] == 'A':
-                        A.append(v)
-                    elif self.sequence[i + pos] == 'G':
-                        G.append(v)
-                    elif self.sequence[i + pos] == 'U':
-                        T.append(v)
-                    else:
-                        C.append(v)
-            labels = ['A','G','U','C']
-
-            data = [A,G,T,C]
-
-
-        else:
-                purines = []
-                pyrimidine = []
-                print('GA')
-                for i, v in enumerate(self.normprofile):
-                    if np.isfinite(v):
-                        if self.sequence[i + pos] == 'A' or self.sequence[i + pos] == 'G':
-                            purines.append(v)
-                        else:
-                            pyrimidine.append(v)
-
-                labels = ['purines', 'pyrimidines']
-                data = [purines, pyrimidine]
-    if not Triplet:
-        if GA:
-            fig, ax = plot.subplots(nrows = 2, ncols = 1, figsize=(6,8))
-        elif not GA and not byNT:
-            fig, ax = plot.subplots(nrows = 2, ncols = 1, figsize=(8,16))
-        else:
-            fig, ax = plot.subplots(nrows = 2, ncols = 1, figsize=(30,30))
-        quants = []
-        for i in data:
-            quants.append([.25,.75])
-
-
-        ax[0].hist(data[0], histtype = 'step', label='Purines')
-        ax[0].hist(data[1], histtype = 'step', label='Pyrimidines')
-        ax[0].legend()
-
-        for i in data:
-            quantiles = np.quantile(i, np.array([ 0.75]))
-            print(quantiles)
-
-        tabledata = [['Comparison', 'P value']]
-        for i,j in itertools.combinations(data, 2):
-
-                u_stat, p = scipy.stats.mannwhitneyu(i, j, alternative='two-sided')
-
-                p_value = p
-                print(p_value)
-                print('{}   {}'.format(data.index(i), data.index(j)))
-
-                #get max and min for putting the annotation in the right place
-                y_max = np.max(np.concatenate((i, j)))
-                y_min = np.min(np.concatenate((i, j)))
-
-                tabledata.append([labels[data.index(i)] + ' vs. ' + labels[data.index(j)], str(p_value)])
-
-            
-        ax[1].table(cellText=tabledata, loc='center' )
-
-        fig.tight_layout(pad = 2)
-        fig.savefig(name + '.pdf')
-    else:
-        out = open('{}.txt'.format(name), 'w')
-        out.write('triplet\tmean_rep1\tmedian_rep1\ttriplet_count_rep1\n')
-        quants = []
-        labels = []
-        data = []
-        for i in _triplets:
-            print(i)
-            try:
-                quantiles = np.quantile(_triplets[i], np.array([0.5]))
-                mean = np.mean(_triplets[i])
-                out.write('{}\t{:.6}\t{:.6}\t{}\n'.format(i, mean, quantiles[0], len(_triplets[i])))
-                labels.append(i)
-                data.append(_triplets[i])
-                quants.append([.25,.75])
-            except:
-                continue
-
-        fig, ax = plot.subplots(nrows = 1, ncols = 1, figsize=(30,30))
-
-        ax.violinplot(data, showmedians=True, quantiles=quants)
-
-        ax.set_xticks(np.arange(1, len(labels) + 1), labels=labels)
-        ax.set_ylim(0,0.25)
-        ax.set_ylabel('BG sub reactivity')
-        fig.tight_layout(pad = 2)
-        fig.savefig(name + '.pdf')
-
-
-reactivityByneighbors(n7, add=n72, GA=True, byNT=False, Triplet=False, pos=1, name=args.out)
diff --git a/msDMS_Saleem&Miller_2025/si3_ac_calcs.py b/msDMS_Saleem&Miller_2025/si3_ac_calcs.py
deleted file mode 100644
index c7c6603..0000000
--- a/msDMS_Saleem&Miller_2025/si3_ac_calcs.py
+++ /dev/null
@@ -1,96 +0,0 @@
-import argparse
-import matplotlib.pyplot as plot
-import numpy as np
-import glob
-from ReactivityProfile import ReactivityProfile as rprofile
-
-argparser = argparse.ArgumentParser()
-argparser.add_argument('--sasa')
-argparser.add_argument('--subunit', choices=['16', '23'])
-argparser.add_argument('--out')
-args = argparser.parse_args()
-
-prot_cut = 1.6
-
-sasa_cut = 1
-
-infile = args.sasa
-
-
-out = open(args.out,'w')
-
-adjustframe = 0
-def adjust_frame(profile, difference):
-    profile.nts = [x + difference for x in profile.nts]
-
-#match end half of profile names
-false_p = []
-false_out = open('{}_false_negatives.txt'.format(sasa.split('.')[0]), 'w')
-for file in glob.glob("*ec{}S_profile.txtga".format(args.subunit)):
-    print(file)
-    n7 = rprofile(file)
-    adjust_frame(n7,adjustframe)
-
-    # read in the sasa file output from pymol script
-    sasa = {}
-    with open(infile) as inp:
-        for line in inp:
-            spl = line.split()
-            sasa[int(spl[0])] = float(spl[1])
-
-    x = 0
-    protected = {}
-    cfprotected = {}
-    while x < len(n7.normprofile):
-        ##loop through the normprofile and take any normalized values above what we consider "protected"
-        if np.isfinite(n7.normprofile[x]) and n7.normprofile[x] >= prot_cut and n7.sequence[x] == 'G' and n7.nts[x] in sasa:
-            protected[n7.nts[x]] = n7.normprofile[x]
-
-        x += 1
-
-
-    positives = {}
-    negatives = {}
-    for i in sasa:
-            ##if measured area is greater than our cutoff, it is unprotected and therefore a "negative"
-            if sasa[i] >= sasa_cut:
-                negatives[i] = sasa[i]
-            ##if measured area is lower than our cutoff it is protected and therefore "positive"
-            if sasa[i] < sasa_cut:
-                positives[i] =  sasa[i]
-
-    TP = 0
-    FP = 0
-    TN = 0
-    FN = 0
-    for i in sasa:
-        ##compare our dict of protected values against the positive and negatives at every position, tick up appropriate bin
-        if i in protected and i in positives:
-            TP += 1
-        if i in protected and i in negatives:
-            FP += 1
-            
-        if i in positives and i not in protected:
-            FN += 1
-            false_p.append(str(i))
-        if i in negatives and i not in protected:
-            TN += 1
-
-    out.write(' TP: {}\n FP: {}\n TN: {}\n FN: {}\n'.format(TP, FP, TN, FN))
-    out.write('Sensitivity: {}\n'.format(TP/(TP+FN)))
-    out.write('Specificity: {}\n'.format(TN/(TN+FP)))
-    out.write('PPV: {}\n'.format(TP/(TP+FP)))
-    out.write('NPV: {}\n'.format(TN/(TN+FN)))
-    out.write('Diagnostic Accuracy: {}\n'.format((TP + TN)/(TP + FP + FN + TN))) 
-    out.write('Protection cutoff: {}\nSASA cutoff:{}\n'.format(prot_cut,sasa_cut))
-    out.write(str(len(sasa)))
-    out.write('\n')
-    out.write(str(TP+FP+TN+FN))
-    out.write('\n')
-
-print(len(set(false_p)))
-for nt in set(false_p):
-    false_out.write(nt+' ')
-
-out.close()
-
diff --git a/msDMS_Saleem&Miller_2025/si3_f_final.py b/msDMS_Saleem&Miller_2025/si3_f_final.py
deleted file mode 100644
index 1b1232c..0000000
--- a/msDMS_Saleem&Miller_2025/si3_f_final.py
+++ /dev/null
@@ -1,69 +0,0 @@
-import numpy as np
-from ReactivityProfile import ReactivityProfile as rprofile
-import argparse
-from pymol import cmd
-
-def get_nts_profile(profile):
-    nt_list = []
-    prof = rprofile(profile)
-    for i,v in enumerate(prof.normprofile):
-        if np.isfinite(v) and v >= 1.6:
-            nt_list.append(i + 1)
-    return nt_list
-
-def adjust_frame(number, listnts):
-    adjustednts = []
-    for i in listnts:
-        adjustednts.append(i+number)
-    return adjustednts
-
-
-
-def get_nts(input_file):
-    nts = []
-    with open(input_file, 'r') as inp:
-        for i,line in enumerate(inp):
-            if i == 0 or i == 1:
-                continue
-            spl= line.split()
-            nts.append(int(spl[0]))
-    return nts
-
-def get_nt_list(inputfile):
-    with open(inputfile, 'r') as inp:
-        for line in inp:
-            nts = line.split()
-    return nts
-
-
-def color_nts(chain, nt_list):
-    cmd.set_color('protected', (83, 48, 95))
-    for G in nt_list:
-        try:
-            cmd.select('protected_nts', 'chain {} and resi {}'.format(chain, G), merge=1)
-            cmd.color('protected', 'chain {} and resi {}'.format(chain, G))
-        except:
-            print('Nt {} not in structure'.format(G))
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--pdbfile', required=True, help='path to crystal structure')
-    parser.add_argument('--chain', type=str, required=True, help='RNA chain id in crystal structure')
-    parser.add_argument('--input', required=True, help='path to infile')
-    parser.add_argument('--outputName', required=True)
-    parser.add_argument('--adjustframe', type=int)
-    parser.add_argument('--profile', action="store_true")
-    args = parser.parse_args()
-    cmd.load(args.pdbfile)
-    #to_color = get_nts(args.input)
-    if not args.profile:
-        to_color = get_nt_list(args.input)
-    else:
-        to_color = get_nts_profile(args.input)
-    if args.adjustframe is not None:
-        to_color = adjust_frame(args.adjustframe,to_color)
-
-    color_nts(args.chain, to_color)
-
-    cmd.save(args.outputName + '.pse')
\ No newline at end of file
diff --git a/msDMS_Saleem&Miller_2025/si6_a_final.py b/msDMS_Saleem&Miller_2025/si6_a_final.py
deleted file mode 100644
index b23d8b5..0000000
--- a/msDMS_Saleem&Miller_2025/si6_a_final.py
+++ /dev/null
@@ -1,99 +0,0 @@
-
-import random
-import numpy as np
-from ReactivityProfile import ReactivityProfile as rprofile
-import RNAStructureObjects as RNAtools
-
-import matplotlib.pyplot as plot
-import scipy.stats
-
-import sys, os, math
-import argparse
-plot.rcParams['pdf.fonttype'] = 42
-plot.rcParams['font.sans-serif'] = 'Arial'
-
-parser = argparse.ArgumentParser()
-parser.add_argument('--profile', required=True, help='path to profile')
-parser.add_argument('--ctfile', required=True)
-parser.add_argument('--profile2', required=True, help='path to profile')
-parser.add_argument('--ctfile2', required=True)
-parser.add_argument('--output', required=True, help='output prefix')
-parser.add_argument('--cutoff', type=int, default=3)
-parser.add_argument('--filterdistance', type=int, default=10)
-args = parser.parse_args()
-
-
-def contact_filter(protections,CT1, filter_distance=0):
-    
-    num_within_range = {}
-    for i in protections:
-        count = 0
-        for j in protections:
-            if i == j:
-                continue
-            if CT1.contactDistance(i, j) > filter_distance:
-                #print('filtered')
-                continue
-            else:
-                count += 1
-        num_within_range[i] = count
-    return num_within_range
-
-def violin_plot(name, labels, data, p):
-    quants = []
-    for i in data:
-        quants.append([.25,.75])
-
-    fig, ax = plot.subplots(nrows = 1, ncols = 1)
-    ax.violinplot(data, showmedians=True, quantiles=quants)
-    ax.set_xticks(np.arange(1, len(labels) + 1), labels=labels)
-    #ax.set_ylim(0, 40)
-    ax.set_ylabel('number of protections within CF 5')
-    ax.set_title('P = {}'.format(p))
-    fig.tight_layout(pad = 2)
-    fig.savefig(name + '.pdf')
-
-
-prof = rprofile(args.profile)
-CT = RNAtools.CT(args.ctfile)
-gs = []
-prot = []
-random_prot = []
-for i,v in enumerate(prof.normprofile):
-    if np.isfinite(v):
-        gs.append(i+1)
-    if v >= args.cutoff:
-        prot.append(i +1)
-
-random_prot = random.sample(gs, k=len(prot))
-print(prot)
-print(random_prot)
-random_counts = contact_filter(random_prot, CT, args.filterdistance)
-counts = contact_filter(prot, CT, args.filterdistance)
-
-prof2 = rprofile(args.profile2)
-CT2 = RNAtools.CT(args.ctfile2)
-gs2 = []
-prot2 = []
-random_prot2 = []
-for i,v in enumerate(prof2.normprofile):
-    if np.isfinite(v):
-        gs2.append(i+1)
-    if v >= args.cutoff:
-        prot2.append(i +1)
-
-random_prot2 = random.sample(gs2, k=len(prot2))
-print(prot2)
-print(random_prot2)
-random_counts2 = contact_filter(random_prot2, CT2, args.filterdistance)
-counts2 = contact_filter(prot2, CT2, args.filterdistance)
-
-x_labels = ['16s Observed', '23s real', '16s Random Distribution', '23s random']
-data_toplot = [list(counts.values()), list(counts2.values()), list(random_counts.values()), list(random_counts2.values())]
-u, p = scipy.stats.mannwhitneyu(list(counts.values()), list(random_counts.values()), alternative='two-sided')
-u2, p2 = scipy.stats.mannwhitneyu(list(counts2.values()), list(random_counts2.values()), alternative='two-sided')
-print(p)
-print(p2)
-
-violin_plot(args.output, x_labels, data_toplot, str(p) + '\t' + str(p2))
-

From 699c0385e0484dd8ef2bdb3a7313846f2ce336de Mon Sep 17 00:00:00 2001
From: tmsmiller <1tmsmiller@gmail.com>
Date: Fri, 5 Dec 2025 14:24:03 -0600
Subject: [PATCH 7/8] Add files via upload

---
 msDMS_Saleem&Miller_2025/2b+c_final.py        | 133 +++++++
 msDMS_Saleem&Miller_2025/2d_final.py          |  75 ++++
 msDMS_Saleem&Miller_2025/3b_final.py          |  80 ++++
 msDMS_Saleem&Miller_2025/3d_calcs_final.py    | 103 +++++
 msDMS_Saleem&Miller_2025/4c+e.py              | 367 ++++++++++++++++++
 msDMS_Saleem&Miller_2025/Figure_1B_final.py   |  42 ++
 msDMS_Saleem&Miller_2025/SI1_C_final.py       |  46 +++
 msDMS_Saleem&Miller_2025/SIfig1_1b_final.py   | 108 ++++++
 msDMS_Saleem&Miller_2025/SIfig1_1d_final.py   |  27 ++
 msDMS_Saleem&Miller_2025/figure1c_final.py    |  35 ++
 msDMS_Saleem&Miller_2025/pymol_rmsd.py        |  52 +++
 msDMS_Saleem&Miller_2025/pymol_sasa.py        |  38 ++
 msDMS_Saleem&Miller_2025/random_ringsample.py | 103 +++++
 msDMS_Saleem&Miller_2025/si2_d+e_final.py     |  67 ++++
 msDMS_Saleem&Miller_2025/si2_g_final.py       |  60 +++
 msDMS_Saleem&Miller_2025/si2_h+i_final.py     | 238 ++++++++++++
 msDMS_Saleem&Miller_2025/si2_k_final.py       |  67 ++++
 msDMS_Saleem&Miller_2025/si3_ac_calcs.py      |  96 +++++
 msDMS_Saleem&Miller_2025/si3_f_final.py       |  69 ++++
 msDMS_Saleem&Miller_2025/si6_a_final.py       |  99 +++++
 20 files changed, 1905 insertions(+)
 create mode 100644 msDMS_Saleem&Miller_2025/2b+c_final.py
 create mode 100644 msDMS_Saleem&Miller_2025/2d_final.py
 create mode 100644 msDMS_Saleem&Miller_2025/3b_final.py
 create mode 100644 msDMS_Saleem&Miller_2025/3d_calcs_final.py
 create mode 100644 msDMS_Saleem&Miller_2025/4c+e.py
 create mode 100644 msDMS_Saleem&Miller_2025/Figure_1B_final.py
 create mode 100644 msDMS_Saleem&Miller_2025/SI1_C_final.py
 create mode 100644 msDMS_Saleem&Miller_2025/SIfig1_1b_final.py
 create mode 100644 msDMS_Saleem&Miller_2025/SIfig1_1d_final.py
 create mode 100644 msDMS_Saleem&Miller_2025/figure1c_final.py
 create mode 100644 msDMS_Saleem&Miller_2025/pymol_rmsd.py
 create mode 100644 msDMS_Saleem&Miller_2025/pymol_sasa.py
 create mode 100644 msDMS_Saleem&Miller_2025/random_ringsample.py
 create mode 100644 msDMS_Saleem&Miller_2025/si2_d+e_final.py
 create mode 100644 msDMS_Saleem&Miller_2025/si2_g_final.py
 create mode 100644 msDMS_Saleem&Miller_2025/si2_h+i_final.py
 create mode 100644 msDMS_Saleem&Miller_2025/si2_k_final.py
 create mode 100644 msDMS_Saleem&Miller_2025/si3_ac_calcs.py
 create mode 100644 msDMS_Saleem&Miller_2025/si3_f_final.py
 create mode 100644 msDMS_Saleem&Miller_2025/si6_a_final.py

diff --git a/msDMS_Saleem&Miller_2025/2b+c_final.py b/msDMS_Saleem&Miller_2025/2b+c_final.py
new file mode 100644
index 0000000..f90b8ef
--- /dev/null
+++ b/msDMS_Saleem&Miller_2025/2b+c_final.py
@@ -0,0 +1,133 @@
+from ReactivityProfile import ReactivityProfile as rprof
+import matplotlib.pyplot as plot
+import numpy as np
+import argparse
+plot.rcParams['pdf.fonttype'] = 42
+plot.rcParams['font.sans-serif'] = 'Arial'
+
+
+def reactivityByNt(self, resnums = None, nts=None, name = None):
+        """ return a list of reactivities for a given set of nts (array), or nt type"""
+        
+        pro = self.profile(name)
+        
+        mask = np.isfinite(pro)
+        #with np.errstate(invalid='ignore'):
+        #    mask = mask & (pro > -0.3) & (pro < 4.0)
+
+        try:
+            ntit = iter(nts)
+            ntmask = (self.sequence == next(ntit))
+            for n in ntit:
+                ntmask = ntmask | (self.sequence == n) 
+            
+            mask = mask & ntmask
+        except TypeError:
+            pass
+
+        try:
+            resnums = set(resnums)
+            mask = mask & np.array([i in resnums for i in self.nts])
+        except TypeError:
+            pass
+
+        return pro[mask]
+
+
+def mutHistogram(self, name = None,name_2 = None, name_3=None, nts = None, resnums = None,
+                     bins=100,axes = None, pool=False, writename='mutHist.pdf', **kwargs):
+        
+
+        
+        write = False
+        if axes is None:
+            fig, axes = plot.subplots()
+            write = True
+
+        if pool:
+            rxn = ((self.reactivityByNt(name='norm', nts='G', resnums = resnums)))
+            rxn_2 = ((name.reactivityByNt(name='norm', nts='G', resnums = resnums)))
+            rxn_3 = ((name_2.reactivityByNt(name='norm', nts='G', resnums = resnums)))
+            rxn_4 = ((name_3.reactivityByNt(name='norm', nts='G', resnums = resnums)))
+            
+            bins=np.histogram(np.hstack((rxn,rxn_2,rxn_3)), bins=(np.arange(0.0, 10)))[1]
+            
+            axes.hist(rxn, histtype = 'step', bins = bins, label='InCell', **kwargs)
+            
+            axes.hist(rxn_2, histtype= 'step', bins=bins, label='Cell Free', **kwargs)
+            
+            axes.hist(rxn_3, histtype= 'step', bins=bins, label='Urea', **kwargs)
+            axes.hist(rxn_4, histtype= 'step', bins=bins, label='Cell Free -Mg', **kwargs)
+        
+        else:
+        
+            rxn = ((self.reactivityByNt(name='norm', nts='G', resnums = resnums)))        
+            axes.hist(rxn, histtype = 'step', bins = bins, label='In Cell', **kwargs)
+
+            if name is not None:
+
+                rxn_2 = ((name.reactivityByNt(name='norm', nts='G', resnums = resnums)))
+
+                axes.hist(rxn_2, histtype= 'step', bins=bins, label='Cell Free', **kwargs)
+        
+            if name_2 is not None:
+
+                rxn_3 = ((name_2.reactivityByNt(name='norm', nts='G', resnums = resnums)))
+
+                axes.hist(rxn_3, histtype= 'step', bins=bins, label='Urea', **kwargs)
+            if name_3 is not None:
+
+                rxn_4 = ((name_3.reactivityByNt(name='norm', nts='G', resnums = resnums)))
+
+                axes.hist(rxn_4, histtype= 'step', bins=bins, label='Cell Free -Mg', **kwargs)
+
+
+        
+
+        axes.legend(loc='upper left')
+        axes.set_ylabel('Count')
+        axes.set_xlabel('Norm N7 reactivity')
+        axes.set_xlim(-3.0, 3.0)
+        axes.set_ylim(0,350)
+        axes.yaxis.set_ticks(np.arange(0, 375, 25))
+        axes.set_title('23s')
+
+        if write:
+            plot.savefig(writename)
+
+
+
+
+def winsorize(prof):
+    for i, v in enumerate(prof):
+        if prof[i] > 3:
+            prof[i] = 3
+        if prof[i] < -3:
+            prof[i] = -3
+
+    return prof
+
+
+if __name__ == '__main__':
+    ntorder = ('A','C','G','U')
+
+    masknt = ('A', 'C', 'U')
+    argparser = argparse.ArgumentParser()
+    argparser.add_argument('--incell')
+    argparser.add_argument('--cf')
+    argparser.add_argument('--cfnomg')
+    argparser.add_argument('--urea')
+    args = argparser.parse_args()
+    #read in reactivity profiles
+    incell = rprof(args.incell)
+    cellfree = rprof(args.cf)
+    cf_nomg = rprof(args.cfnomg)
+    urea = rprof(args.urea)
+
+    incell.normprofile = winsorize(incell.normprofile)
+    cellfree.normprofile = winsorize(cellfree.normprofile)
+    urea.normprofile = winsorize(urea.normprofile)
+    cf_nomg.normprofile= winsorize(cf_nomg.normprofile)
+
+    #pass in incell first, then denatured
+    mutHistogram(incell,cellfree, urea,cf_nomg, bins=np.linspace(-3.0, 3.0, num=25),writename ='Fig2B_23s_InCellvsCFvsUrea_norm_top_08052024.pdf')
\ No newline at end of file
diff --git a/msDMS_Saleem&Miller_2025/2d_final.py b/msDMS_Saleem&Miller_2025/2d_final.py
new file mode 100644
index 0000000..31e4df4
--- /dev/null
+++ b/msDMS_Saleem&Miller_2025/2d_final.py
@@ -0,0 +1,75 @@
+from sklearn.metrics import roc_curve, roc_auc_score
+import matplotlib.pyplot as plot
+import numpy as np
+import argparse
+
+from ReactivityProfile import ReactivityProfile as rprofile
+
+argparser = argparse.ArgumentParser()
+argparser.add_argument('--n7prof')
+argparser.add_argumnet('--sasa')
+args = argparser.parse_args()
+
+
+
+n7 = rprofile(args.n7prof)
+
+infile = args.sasa
+
+
+# read in the sasa file output from pymol script
+sasa = {}
+with open(infile) as inp:
+    for line in inp:
+        spl = line.split()
+        sasa[int(spl[0])] = float(spl[1])
+
+
+prot_cut = 1.6
+sasa_cut = 1
+
+x = 0
+protected = {}
+cfprotected = {}
+while x < len(n7.normprofile):
+    ##loop through the normprofile and take any normalized values above what we consider "protected"
+    if np.isfinite(n7.normprofile[x]) and n7.normprofile[x] >= prot_cut and n7.sequence[x] == 'G' and n7.nts[x] in sasa:
+        protected[n7.nts[x]] = n7.normprofile[x]
+    x += 1
+
+
+positives = {}
+negatives = {}
+for i in sasa:
+        ##if measured area is greater than our cutoff, it is unprotected and therefore a "negative"
+        if sasa[i] >= sasa_cut:
+            negatives[i] = sasa[i]
+        ##if measured area is lower than our cutoff it is protected and therefore "positive"
+        if sasa[i] < sasa_cut:
+            positives[i] =  sasa[i]
+
+TP = 0
+FP = 0
+TN = 0
+FN = 0
+for i in sasa:
+    ##compare our dict of protected values against the positive and negatives at every position, tick up appropriate bin
+    if i in protected and i in positives:
+        TP += 1
+    if i in protected and i in negatives:
+        FP += 1
+    if i in positives and i not in protected:
+        FN += 1
+    if i in negatives and i not in protected:
+        TN += 1
+
+print(' TP: {}\n FP: {}\n TN: {}\n FN: {}\n'.format(TP, FP, TN, FN))
+print('Sensitivity: {}\n'.format(TP/(TP+FN)))
+print('Specificity: {}\n'.format(TN/(TN+FP)))
+print('PPV: {}\n'.format(TP/(TP+FP)))
+print('NPV: {}\n'.format(TN/(TN+FN)))
+print('Diagnostic Accuracy: {}'.format((TP + TN)/(TP + FP + FN + TN))) 
+print('Protection cutoff: {}\nSASA cutoff:{}\n'.format(prot_cut,sasa_cut))
+print(len(sasa))
+print(TP+FP+TN+FN)
+
diff --git a/msDMS_Saleem&Miller_2025/3b_final.py b/msDMS_Saleem&Miller_2025/3b_final.py
new file mode 100644
index 0000000..51e1989
--- /dev/null
+++ b/msDMS_Saleem&Miller_2025/3b_final.py
@@ -0,0 +1,80 @@
+import random
+import numpy as np
+from ReactivityProfile import ReactivityProfile as rprofile
+import RNAStructureObjects as RNAtools
+
+import matplotlib.pyplot as plot
+import scipy.stats
+
+import sys, os, math
+import argparse
+plot.rcParams['pdf.fonttype'] = 42
+plot.rcParams['font.sans-serif'] = 'Arial'
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--real', required=True, help='path to profile')
+parser.add_argument('--random', required=True)
+parser.add_argument('--output', required=True, help='output prefix')
+parser.add_argument('--random2', required=True, help='2nd random dist')
+parser.add_argument('--real2', required=True, help='2nd real dist')
+args = parser.parse_args()
+
+
+def contact_filter(protections,CT1, filter_distance=0):
+    
+    num_within_range = {}
+    for i in protections:
+        count = 0
+        for j in protections:
+            if i == j:
+                continue
+            if CT1.contactDistance(i, j) > filter_distance:
+                
+                continue
+            else:
+                count += 1
+        num_within_range[i] = count
+    return num_within_range
+
+def parse_infile(input_f):
+    nts = {}
+    with open(input_f) as inp:
+        for i,line in enumerate(inp):
+            if i == 0:
+                continue
+            else:
+                spl = line.split()
+                if spl[2] == '0.0':
+                    continue
+                else:
+                    nts[int(spl[0])] = int(spl[1])
+    return nts
+
+def violin_plot(name, labels, data, p):
+    quants = []
+    for i in data:
+        quants.append([.25,.75])
+
+    fig, ax = plot.subplots(nrows = 1, ncols = 1)
+    ax.violinplot(data, showmedians=True)
+    ax.set_xticks(np.arange(1, len(labels) + 1), labels=labels)
+    #ax.set_ylim(0, 40)
+    ax.set_ylabel('number of protections within 20A')
+    ax.set_title('P = {}'.format(p))
+    fig.tight_layout(pad = 2)
+    fig.savefig(name + '.pdf')
+
+
+random_counts = parse_infile(args.random)
+counts = parse_infile(args.real)
+random_counts2 = parse_infile(args.random2)
+counts2 = parse_infile(args.real2)
+
+x_labels = ['16s Observed', '23s real', '16s Random Distribution', '23s random']
+data_toplot = [list(counts.values()), list(counts2.values()), list(random_counts.values()), list(random_counts2.values())]
+u, p = scipy.stats.mannwhitneyu(list(counts.values()), list(random_counts.values()), alternative='two-sided')
+u2, p2 = scipy.stats.mannwhitneyu(list(counts2.values()), list(random_counts2.values()), alternative='two-sided')
+print(p)
+print(p2)
+
+violin_plot(args.output, x_labels, data_toplot, str(p) + '\t' + str(p2))
\ No newline at end of file
diff --git a/msDMS_Saleem&Miller_2025/3d_calcs_final.py b/msDMS_Saleem&Miller_2025/3d_calcs_final.py
new file mode 100644
index 0000000..944f202
--- /dev/null
+++ b/msDMS_Saleem&Miller_2025/3d_calcs_final.py
@@ -0,0 +1,103 @@
+import argparse
+import matplotlib.pyplot as plot
+import numpy as np
+import random
+import copy
+
+plot.rcParams['pdf.fonttype'] = 42
+plot.rcParams['font.sans-serif'] = 'Arial'
+
+def read_prot_nts(fname):
+    sizes = []
+    with open(fname) as inp:
+        for line in inp:
+            sizes.append(int(line.strip()))
+    return sizes
+
+def read_primary_nts(fname):
+    p_locus = []
+    with open(fname) as inp:
+        for line in inp:
+            spl = line.split('-')
+            for i in range(int(spl[0]), int(spl[1]) + 1):
+                p_locus.append(i)
+    return p_locus
+
+def sample_fold(size_l,loci_nts, rna):
+    rna_copy = copy.deepcopy(rna)
+    length = len(rna_copy)
+    percent_cover = []
+    encapsulated = 0
+    selected = {}
+    for i in size_l:
+        nts_in = []
+        selected[i] = set()
+        while len(selected[i]) == 0:
+            prospective = set()
+            start = random.choice(rna_copy)
+            end = start + i
+            if end > length:
+                continue
+            for j in range(start, end + 1):
+                prospective.add(j)
+
+
+            if prospective.issubset(set(rna_copy)):
+                selected[i] = prospective
+        for j in selected[i]:
+            rna_copy.remove(j)
+            if j in loci_nts:
+                nts_in.append(j)
+        cover = len(nts_in) / len(selected[i])
+        percent_cover.append(cover)
+        if cover == 1.0:
+            encapsulated += 1
+
+    return encapsulated
+
+def histogram(en, label = '', title='',name='sfd_hist.pdf'):
+    fig, ax = plot.subplots()
+    ax.hist(en, histtype = 'bar', bins=[0,1,2,3,4,5,6,7,8,9,10],label=label, align='left')
+    ax.axvline(x=np.percentile(en, [50]), label=label + " median " + str(np.percentile(en, [50])))
+    ax.axvline(x=np.mean(en), label=label + " mean " + str(np.mean(en)))
+    ax.axvline(x=6, label='Observed')
+
+    
+    ax.set_ylabel('Count')
+    ax.set_xlabel('# of SFD encapsulated')
+    ax.set_xticks(range(10))
+    
+    ax.set_title(title)
+    ax.legend(loc='upper right')
+    fig.tight_layout()
+    plot.savefig(name)
+
+if __name__ == '__main__':
+    #1542 16s
+    #2904 23s
+    prot_nts = read_prot_nts('23s_selffoldingnts.txt')
+    prim_nts = read_primary_nts('23s_primarynts.txt')
+    #prot_nts = read_prot_nts('16s_selffoldingnts.txt')
+    #prim_nts = read_primary_nts('16s_primarynts.txt')
+    ssu = []
+    lsu = []
+    
+    #for i in range(1, 1542 + 1):
+    #    ssu.append(i)
+    
+    for i in range(1, 2904 + 1):
+        ssu.append(i)
+
+    #lsu_en = []
+    ssu_en = []
+
+    while len(ssu_en) < 10000:
+        ssu_en.append(sample_fold(prot_nts, prim_nts, ssu))
+
+    hit = []
+    for i in ssu_en:
+        if i == 6:
+            hit.append(i)
+    print(len(hit)/ len(ssu_en))
+    histogram(ssu_en, title='23s', name='23s_10k_sfd_hist.pdf')
+    
diff --git a/msDMS_Saleem&Miller_2025/4c+e.py b/msDMS_Saleem&Miller_2025/4c+e.py
new file mode 100644
index 0000000..d625780
--- /dev/null
+++ b/msDMS_Saleem&Miller_2025/4c+e.py
@@ -0,0 +1,367 @@
+from sklearn.metrics import roc_curve, roc_auc_score
+
+import numpy as np
+
+from ReactivityProfile import ReactivityProfile as rprofile
+import RNAStructureObjects as RNAtools
+import matplotlib as mpl
+import matplotlib.pyplot as plot
+mpl.use('Agg')
+mpl.rcParams['pdf.fonttype'] = 42
+mpl.rcParams['font.sans-serif'] ='Arial'
+
+import sys, os, math
+import argparse
+
+
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--pdbfile', required=True, help='path to crystal structure')
+parser.add_argument('--chain', type=str, required=True, help='RNA chain id in crystal structure')
+parser.add_argument('--inputRings', required=True, help='path to ringfile')
+parser.add_argument('--adjustFrame', type=int, default=0, help='adjust frame of profile to match crystal structure if necessary')
+parser.add_argument('--profile', required=True, help='path to profile')
+parser.add_argument('--ringType', choices=('N1', 'N1N7', 'N7'), required=True, help='type of rings to plot')
+parser.add_argument('--crystalStart', type=int, required=True, help='first position of the RNA chain')
+parser.add_argument('--crystalEnd', type=int, required=True, help='last position of the RNA chain')
+parser.add_argument('--outputName', required=True, help='Output prefix for pymol session')
+parser.add_argument('--contactFilter', type=int, default=0, help='contact filter distance')
+parser.add_argument('--concatenated', action='store_true', help='plot all types of rings')
+parser.add_argument('--filterNegative', action='store_true', help='filter negative rings')
+parser.add_argument('--ctFile', required=True, help='path to ct file')
+parser.add_argument('--APCcutoff', type=int, default=2, help='filter rings with a score lower than or equal to this number')
+parser.add_argument('--approx', action='store_true', help='draw ring to closest nt if nt is missing from crystal structure')
+parser.add_argument('--renderall', action='store_true', help='render all the rings if a total concat file is passed')
+parser.add_argument('--distanceFilter', type=int, default=99999, help='check average contact distance of things within filter(angstroms)')
+args = parser.parse_args()
+
+
+
+'''
+tool for parsing ring data and drawing it onto a crystal structure
+
+takes a profile, ringfile
+takes a chain of interest for a crystal structure
+takes first and last residue numbers of that chain to get things in frame
+takes a frameshift integer
+'''
+
+cmd.load(args.pdbfile)
+
+profile = rprofile(args.profile)
+
+rings = []
+residues = []
+
+def adjust_frame(profile, ring_list, difference):
+    profile.nts = [x + difference for x in profile.nts]
+    for i in ring_list:
+        
+        i[0] += difference
+        i[1] += difference
+
+
+def parse_ringmap(cutoff, name='', filterneg=False):
+    rings = []
+    with open(name) as inp:
+        for p, line in enumerate(inp):
+            if p == 0 or p == 1:
+                continue
+            else:
+                split = line.split()
+                #split[2] for sig split[9] for alpha
+                if float(split[9]) <= cutoff:
+                    continue
+                    
+                if filterneg:
+                    if split[3] == '-1':
+                        
+                        continue
+                rings.append([int(split[0]), int(split[1]), float(split[9])])
+    return rings
+
+def contact_filter(inrings, filter_distance=0, ctfile=None):
+    
+    rings = []
+    CT1 = None
+    if ctfile is not None:
+        CT1 = RNAtools.CT(ctfile)
+    
+    for i in inrings:
+            #print(CT1.contactDistance(i[0], i[1]))
+            #print(str(i[0]) + ' ' + str(i[1]))
+            if CT1.contactDistance(i[0], i[1]) <= filter_distance:
+                #print('filtered')
+                continue
+            else:
+                
+                i.append(CT1.contactDistance(i[0],i[1]))
+                print("contact filter add {}".format(i))
+                rings.append(i)
+
+    return rings
+
+def split_concat(concat_list, profile):
+    last =  profile.nts[-1]
+    n1_rings = []
+    n1n7_rings = []
+    n7_rings = []
+    for i in concat_list:
+        if i[0] > last and i[1] > last:
+            i[0] -= last
+            i[1] -= last
+            n7_rings.append(i)
+        elif i[0] < last and i[1] < last:
+            n1_rings.append(i)
+        else:
+            n1n7_rings.append(i)
+            if i[0] > last:
+                i[0] -= last
+            else:
+                i[1] -= last
+    print(len(n1_rings), len(n1n7_rings), len(n7_rings))
+    return(n1_rings, n1n7_rings, n7_rings)
+
+def closest(lst, K):
+     
+    return lst[min(range(len(lst)), key = lambda i: abs(lst[i]-K))]
+
+
+def draw(ring_list, chain, crystalstart, crystalend, approx, ringtype, render=False, distancefilter=0):
+    distances = []
+    scores = []
+
+    
+    #get all the residues in a chain. some crystal structures are missing residues in the middle
+    
+    dchain = 'chain {}'.format(chain)
+    cmd.iterate(dchain,'residues.append(int(resi))')
+    fresidues = [*set(residues)]
+    #print('residues {}'.format(fresidues))
+    
+    
+    if ringtype == 'N1':
+        colors = [(44,123,182), (44,123,182), (171,217,233), (255,255,255), (253,174,97), (215,25,28), (215,25,28)]
+    else:
+        colors = [(91, 218, 190), (91, 218, 190), (0, 95, 154), (255, 255, 255), (218, 91, 172), (83, 48, 95), (83, 48, 95)]
+
+    #change to (20,100) for sig
+    cutoffs = [-1e10, -5, -2, 0, 2, 5, 1e10]
+    #alpha = [1.0, 1.0, 0.0, 0.0, 1.0, 1.0]
+    gradiant = [False, True, False, False, True, False]
+
+    for v,i in enumerate(ring_list):
+        linestart = i[0]
+        startstr = 'start_{}'.format(v+1)
+        endstr = 'end_{}'.format(v+1)
+        lineend = i[1]
+
+        
+        if linestart not in fresidues:
+            if approx:
+                linestart = closest(fresidues, linestart)
+            else:
+                continue
+        if lineend not in fresidues:
+            if approx:
+                lineend = closest(fresidues, lineend)
+            else:
+                continue
+
+        if linestart  <= crystalstart or lineend <= crystalstart:
+            
+            continue
+        elif linestart >= crystalend or lineend >= crystalend:
+            
+            continue
+
+        else:
+            distance = cmd.get_distance(atom1='chain {} and resi {} and name P'.format(chain, linestart), atom2='chain {} and resi {} and name P'.format(chain, lineend))
+            
+            if distance < distancefilter:
+                try:
+                    print(i)
+                    scores.append(i[3] / distance)
+                except:
+                    print('FAIL at ring {}'.format(i))
+            
+
+            distances.append(distance)
+            
+            for ind in range(len(cutoffs) - 1):
+                if cutoffs[ind] < i[2] <= cutoffs[ind + 1]:
+                    if gradiant[ind]:
+                        calccolor = colorGrad(i[2], colors[ind], colors[ind + 1], cutoffs[ind], cutoffs[ind + 1])
+                    else:
+                        calccolor = colors[ind]
+
+            cmd.set_color("color {}".format(v+1),calccolor)
+
+            cmd.select(startstr, 'chain {} and resi {} and name P'.format(chain, linestart))
+            start = cmd.get_coords('chain {} and resi {} and name P'.format(chain, linestart), 1)
+            
+            cmd.pseudoatom('points', name='start_{}'.format(v+1), pos=tuple(start[0]), color="color {}".format(v+1))
+        
+            cmd.deselect()
+        
+            cmd.select(endstr, 'chain {} and resi {} and name P'.format(chain, lineend))
+            end = cmd.get_coords('end_{}'.format(v+1), 1)
+            cmd.pseudoatom('points', name='end_{}'.format(v+1), pos=tuple(end[0]), color="color {}".format(v+1))
+
+            cmd.deselect()
+            if render:
+                print('rendering rings')
+                cmd.bond('points and name start_{}'.format(v+1), 'points and name end_{}'.format(v+1))
+
+    print("scores {}".format(scores))
+    try:
+        print("average score {}".format(sum(scores)/len(scores)))
+    except:
+        print('no rings in category')
+    return distances
+
+
+def filter_unprot_pos(ring_list, gaprofile):
+    '''
+    should be run after adjust_frame if applicable
+    '''
+
+    normvalues = {}
+    for i, v in enumerate(gaprofile.nts):
+        if np.isfinite(gaprofile.normprofile[i]):
+            normvalues[v] =  gaprofile.normprofile[i]
+
+    for i in ring_list:
+        if i[0] in normvalues and normvalues[i[0]] <= 2:
+            ring_list.remove(i)
+        if i[1] in normvalues and normvalues[i[1]] <= 2:
+            ring_list.remove(i)
+
+    return ring_list
+
+
+def filter_sig(ring_list, cutoff):
+    for i in ring_list:
+        if float(i[3]) < cutoff:
+            ring_list.remove(i)
+    return ring_list
+
+def filter_unconcat(concat_list, unconcat_list):
+    print('concat length {}'.format(len(concat_list)))
+    num_matches = 0
+    
+    non_matches = []
+    original = concat_list.copy()
+    for i in unconcat_list:
+        if i in original:
+            num_matches += 1
+            concat_list.remove(i)
+            
+        elif i not in original:
+            non_matches.append(i)
+    
+
+    for i in non_matches:
+        i[0] += 1
+    for i in non_matches:
+        if i in concat_list:
+            num_matches += 1
+    print(concat_list, '\n')
+    print(non_matches)
+    print('{} matches'.format(num_matches))
+
+    print(len(unconcat_list))
+    print("{} filtered length".format(len(concat_list)))
+    return concat_list
+
+
+
+def dist_histogram(dist_list, bins=10, title='',name='dist_hist.pdf'):
+    fig, ax = plot.subplots()
+    label = ''
+    colors = ['blue', 'orange', 'green']
+    for i, v in enumerate(dist_list):
+        if len(v) <= 0:
+            continue
+        if i == 0:
+            label = "N7"
+        elif i == 1:
+            label = 'N1'
+        elif i == 2:
+            label = "N7"
+        
+        ax.hist(v, histtype = 'step', bins = bins, label=label)
+        ax.axvline(x=np.percentile(v, [50]), color=colors[i],label=label + " median " + str(np.percentile(v, [50])))
+        ax.axvline(x=np.mean(v), color=colors[i],label=label + " mean " + str(np.mean(v)))
+
+    
+    ax.set_ylabel('Count')
+    ax.set_xlabel('P-P Distance A')
+    ax.set_xlim(0, 70)
+    
+    #ax.set_ylim(20,150)
+    ax.set_title(title)
+    ax.legend(loc='upper right')
+    fig.tight_layout()
+    plot.savefig(name)
+
+def colorGrad(value, colorMin, colorMax, minValue, maxValue, log=False):
+        """ return an interpolated rgb color value """
+        
+        if log:
+            value = 10**-value
+            minValue = 10**-minValue
+            maxValue = 10**-maxValue
+
+        if value > maxValue:
+            return colorMax
+        elif value < minValue:
+            return colorMin
+        else:
+            v = value - min(maxValue, minValue)
+            v /= abs(maxValue-minValue)
+        
+            col = []
+            for i in range(3):
+                col.append( v*(colorMax[i] - colorMin[i]) + colorMin[i] )
+            
+            return col
+    
+
+rings = parse_ringmap(args.APCcutoff, args.inputRings, args.filterNegative)
+N1_rings, N1N7_rings, N7_rings = split_concat(rings, profile)
+
+if args.concatenated:
+    args.ringType= 'N7'
+    N7_rings.extend(N1N7_rings)
+    #N1_rings.extend(N7_rings)
+    pass
+
+rings_toplot = []
+if args.ringType == 'N7':
+    rings_toplot = N7_rings.copy()
+elif args.ringType == 'N1N7':
+    rings_toplot = N1N7_rings.copy()
+else:
+    rings_toplot = N1_rings.copy()
+
+print('precf {}'.format(rings_toplot))
+if args.contactFilter > 0:
+    rings_toplot = contact_filter(rings_toplot, filter_distance=args.contactFilter, ctfile=args.ctFile)
+    N1_rings =  contact_filter(N1_rings, filter_distance=args.contactFilter, ctfile=args.ctFile)
+    N1N7_rings = contact_filter(N1N7_rings, filter_distance=args.contactFilter, ctfile=args.ctFile)
+    N7_rings =  contact_filter(N7_rings, filter_distance=args.contactFilter, ctfile=args.ctFile)
+
+
+
+
+adjust_frame(profile, rings_toplot, args.adjustFrame)
+dist_data = []
+dist_data1 = draw(rings_toplot, args.chain, args.crystalStart, args.crystalEnd, args.approx, args.ringType, render=True)
+dist_data2 = draw(N1_rings, args.chain, args.crystalStart, args.crystalEnd, args.approx, args.ringType, render=False)
+dist_data.append(dist_data1)
+dist_data.append(dist_data2)
+
+dist_histogram(dist_data, bins=np.linspace(0,50,10), title='{} {}cf filterneg={} rings'.format(args.inputRings, args.contactFilter, str(args.filterNegative)), name='{} {}cf filterneg={} rings.pdf'.format(args.inputRings, args.contactFilter, str(args.filterNegative)))
+plot.close('all')
+cmd.save(args.outputName + '.pse')
\ No newline at end of file
diff --git a/msDMS_Saleem&Miller_2025/Figure_1B_final.py b/msDMS_Saleem&Miller_2025/Figure_1B_final.py
new file mode 100644
index 0000000..188bfef
--- /dev/null
+++ b/msDMS_Saleem&Miller_2025/Figure_1B_final.py
@@ -0,0 +1,42 @@
+import argparse
+from ReactivityProfile import ReactivityProfile as rprofile
+import matplotlib as mpl
+import numpy as np
+mpl.use('Agg')
+mpl.rcParams['pdf.fonttype'] = 42
+mpl.rcParams['font.sans-serif'] = 'Arial'
+from matplotlib import pyplot as plot
+
+argparser = argparse.ArgumentParser()
+argparser.add_argument('--ssu_old')
+argparser.add_argument('--ssu_new')
+argparser.add_argument('--lsu_old')
+argparser.add_argument('--lsu_new')
+argparse.add_argument('--out')
+args = argparser.parse_args()
+small_old = rprofile(args.ssu_old)
+large_old = rprofile(args.lsu_old)
+small_new = rprofile(args.ssu_new)
+large_new = rprofile(args.lsu_new)
+
+
+old = [small_old.rawprofile[52], large_old.rawprofile[72]]
+new = [small_new.rawprofile[52],large_new.rawprofile[72]]
+
+
+xaxis = ['16s', '23s']
+barcount = 2
+ind = np.arange(barcount)
+width = 0.3
+
+fig, ax = plot.subplots()
+
+ax.bar(ind,new, width, label='new')
+ax.bar(ind+width , old, width, label='old')
+ax.set_xticks(ind + width/2, ('16s', '23s'))
+
+
+ax.set_title('m7g reactivities')
+ax.legend(loc='upper right')
+fig.tight_layout()
+plot.savefig(args.out)
\ No newline at end of file
diff --git a/msDMS_Saleem&Miller_2025/SI1_C_final.py b/msDMS_Saleem&Miller_2025/SI1_C_final.py
new file mode 100644
index 0000000..ec7bf1f
--- /dev/null
+++ b/msDMS_Saleem&Miller_2025/SI1_C_final.py
@@ -0,0 +1,46 @@
+import sys
+from ReactivityProfile import ReactivityProfile
+import matplotlib as mpl
+import matplotlib.pyplot as plt
+import numpy as np
+import argparse
+mpl.use('Agg')
+mpl.rcParams['font.family'] = 'sans-serif'
+mpl.rcParams['pdf.fonttype'] = 42
+mpl.rcParams['font.sans-serif'] = 'Arial'
+
+argparser = argparse.ArgumentParser()
+argparser.add_argument('--prof1')
+argparser.add_argument('--prof2')
+argparser.add_argument('--out')
+args = argparser.parse_args()
+
+prof1 = ReactivityProfile(args.prof1)
+prof2 = ReactivityProfile(args.prof2)
+
+fig, ax = plt.subplots()
+
+ax.step(prof1.nts, (prof1.rawprofile * 100), label=args.prof1, where='mid', linewidth=2, linestyle='-')
+ax.step(prof1.nts, (prof2.rawprofile * 100), label=args.prof2, where='mid', linewidth=2)
+
+#52 for ssu 72 for lsu
+seq =  [str(i) for i in prof1.sequence]
+#seq[72] = 'm7G'
+seq[52] = 'm7G'
+
+ax.set_xlabel('Nucleotide', fontsize=20)
+ax.set_xticks(prof1.nts)
+ax.set_xticklabels(seq)
+
+#ax.set_xlim([65,82])
+ax.set_xlim([44,64])
+
+
+ax.set_ylabel('Mutation rate(%)', fontsize=20)
+ax.set_ylim([0, 25])
+
+ax.legend()
+fig.tight_layout()
+
+fig.savefig(args.out)
+
diff --git a/msDMS_Saleem&Miller_2025/SIfig1_1b_final.py b/msDMS_Saleem&Miller_2025/SIfig1_1b_final.py
new file mode 100644
index 0000000..3186c5a
--- /dev/null
+++ b/msDMS_Saleem&Miller_2025/SIfig1_1b_final.py
@@ -0,0 +1,108 @@
+from ReactivityProfile import ReactivityProfile as rp
+import matplotlib as mpl
+import numpy as np
+mpl.use('Agg')
+mpl.rcParams['pdf.fonttype'] = 42
+mpl.rcParams['font.sans-serif'] = 'Arial'
+from matplotlib import pyplot as plot
+from matplotlib.ticker import AutoMinorLocator
+
+def get_data(listofprofs, subunit):
+    m7gs_mn = []
+    bgs_mn = []
+    if subunit == 'large':
+        x = 72
+    else:
+        print('smallsub')
+        x = 52
+    for i in listofprofs:
+        print(i.rawprofile)
+        m7gs_mn.append(i.rawprofile[x])
+        allreactivities = 0
+        for j in i.rawprofile:
+            if np.isfinite(j):
+                allreactivities += j
+        allreactivities -= i.rawprofile[x]
+        bg = allreactivities / (len(i.rawprofile) - 1)
+        bgs_mn.append(bg)
+    return m7gs_mn, bgs_mn
+
+
+Original_16s = rp('ph83_16s_16sm7_profile.txt')
+half_mn_16s = rp('05mM_16sm7_profile.txt')
+two_mn_16s =rp('2mM_16sm7_profile.txt')
+two5_mn_16s = rp('25ph83vs2ph83_16sm7_profile.txt')
+three_mn_16s = rp('3mM_16sm7_profile.txt')
+
+
+ph75_16s = rp('ph75_16sm7_profile.txt')
+ph8_16s = rp('ph8_16sm7_profile.txt')
+ph85_16s = rp('ph85_16sm7_profile.txt')
+finalcondition_16s = rp('2mmph9_16sm7_profile.txt')
+
+Original_23s = rp('ph83_23sm7_profile.txt')
+half_mn_23s = rp('05mM_23sm7_profile.txt')
+two_mn_23s = rp('2mM_23sm7_profile.txt')
+two5_mn_23s = rp('25mM_23sm7_profile.txt')
+three_mn_23s = rp('3mM_23sm7_profile.txt')
+
+ph75_23s = rp('ph75_23sm7_profile.txt')
+ph8_23s = rp('ph8_23sm7_profile.txt')
+ph85_23s = rp('ph85_16sm7_profile.txt')
+finalcondition_23s = rp('2mmph9_23sm7_profile.txt')
+
+ssu_manganese_m7gs, ssu_manganese_bgs = get_data([half_mn_16s, Original_16s, two_mn_16s, two5_mn_16s, three_mn_16s], '')
+ssu_ph_m7gs, ssu_ph_bgs = get_data([ph75_16s, ph8_16s, Original_16s, ph85_16s], '')
+ssu_final_m7g, ssu_final_bg = get_data([finalcondition_16s], '')
+
+lsu_manganese_m7gs, lsu_manganese_bgs = get_data([half_mn_23s, Original_23s, two_mn_23s, two5_mn_23s, three_mn_23s], 'large')
+lsu_ph_m7gs, lsu_ph_bgs = get_data([ph75_23s, ph8_23s, Original_23s, ph85_23s], 'large')
+lsu_final_m7g, lsu_final_bg = get_data([finalcondition_23s], 'large')
+
+
+manganese_m7gs = [np.mean(i) for i in zip(ssu_manganese_m7gs,lsu_manganese_m7gs)]
+ph_m7gs = [np.mean(i) for i in zip(ssu_ph_m7gs,lsu_ph_m7gs)]
+final_m7g = [np.mean(i) for i in zip(ssu_final_m7g,lsu_final_m7g)]
+
+
+manganese_bgs = [np.mean(i) for i in zip(ssu_manganese_bgs,lsu_manganese_bgs)]
+ph_bgs = [np.mean(i) for i in zip(ssu_ph_bgs,lsu_ph_bgs)]
+final_bg = [np.mean(i) for i in zip(ssu_final_m7g,lsu_final_bg)]
+
+
+#add in empty data so final condition can be included in plot
+ph_m7gs.append(0.0)
+ph_bgs.append(0.0)
+
+labels_1 = ['0.5', '1', '2', '2.5', '3']
+labels_2 = ['7.5', '8.0', '8.3', '8.5', '9']
+labels_3 = ['2mM Mn pH 8.3', '1mM Mn pH 9', '2mM Mn pH 9']
+fig,ax = plot.subplots(1, 4, figsize=(10,2))
+
+#ax[2] = ax[0].twinx()
+#share yaxis
+ax[0].set_title('Manganese Concentration')
+ax[0].scatter(labels_1, manganese_m7gs)
+ax[0].plot(labels_1, manganese_m7gs)
+#add bg data
+ax[0].scatter(labels_1, manganese_bgs)
+ax[0].scatter(labels_1, [0.0,0.0,final_m7g[0],0.0,0.0])
+ax[0].plot(labels_1, manganese_bgs)
+ax[0].set_yscale('log', base=10)
+
+
+ax[1].set_title('Acidity')
+ax[1].scatter(labels_2, ph_m7gs)
+ax[1].plot(labels_2, ph_m7gs)
+#add bg data
+ax[1].scatter(labels_2, ph_bgs)
+ax[1].plot(labels_2, ph_bgs)
+ax[1].scatter(labels_2, [0.0, 0.0 ,0.0 ,0.0, final_m7g[0]])
+ax[1].set_yscale('log', base=10)
+#ax[3] = ax[1].twinx()
+
+
+
+
+fig.tight_layout()
+plot.savefig('SIFigure_1A_averages.pdf')
\ No newline at end of file
diff --git a/msDMS_Saleem&Miller_2025/SIfig1_1d_final.py b/msDMS_Saleem&Miller_2025/SIfig1_1d_final.py
new file mode 100644
index 0000000..461632e
--- /dev/null
+++ b/msDMS_Saleem&Miller_2025/SIfig1_1d_final.py
@@ -0,0 +1,27 @@
+import pandas as pd
+import matplotlib as mpl
+import numpy as np
+mpl.use('Agg')
+mpl.rcParams['pdf.fonttype'] = 42
+mpl.rcParams['font.sans-serif'] = 'Arial'
+from matplotlib import pyplot as plt
+
+#df = pd.read_csv('2mmph9_Modified_16sm7_mutation_counts.txt',sep='\t',header=(0))
+df = pd.read_csv('ph9_Modified_23sm7_mutation_counts.txt',sep='\t',header=(0))
+
+#23s
+m7g = df.iloc[72]
+#16s
+#m7g = df.iloc[52]
+
+
+labels = 'GA', 'Other'
+data = [m7g['GA'], m7g['GT'] + m7g['GC'] + m7g['complex_insertion'] + m7g['complex_deletion'] + m7g['G_multinuc_mismatch']]
+
+fig, ax = plt.subplots()
+ax.pie(data, labels=labels, autopct='%1.1f%%')
+
+print(m7g['GA'])
+
+fig.tight_layout()
+plt.savefig('SIFigure_1C_23sfinalRT.pdf')
\ No newline at end of file
diff --git a/msDMS_Saleem&Miller_2025/figure1c_final.py b/msDMS_Saleem&Miller_2025/figure1c_final.py
new file mode 100644
index 0000000..c18bd38
--- /dev/null
+++ b/msDMS_Saleem&Miller_2025/figure1c_final.py
@@ -0,0 +1,35 @@
+from ReactivityProfile import ReactivityProfile as rprofile
+import argparse
+import matplotlib as mpl
+import numpy as np
+mpl.use('Agg')
+mpl.rcParams['pdf.fonttype'] = 42
+mpl.rcParams['font.sans-serif'] = 'Arial'
+from matplotlib import pyplot as plot
+
+argparser = argparse.ArgumentParser()
+argparser.add_argument('--sat')
+argparser.add_argument('--unsat')
+argparser.add_argument('--out')
+args = argparser.parse_args()
+
+satTPP = rprofile(args.sat)
+unsatTPP = rprofile(args.unsat)
+
+fig, ax = plot.subplots()
+
+
+#manual bg sub
+unsat_man_bg = np.nan_to_num(unsatTPP.rawprofile[18:120]) - np.nan_to_num(unsatTPP.backprofile[18:120])
+sat_man_bg = np.nan_to_num(satTPP.rawprofile[18:120]) - np.nan_to_num(satTPP.backprofile[18:120])
+
+
+ax.step(satTPP.nts[18:120], unsat_man_bg, label='Unsaturated', where='mid')
+ax.step(satTPP.nts[18:120], sat_man_bg, label='Saturated', where='mid')
+
+
+
+ax.set_title('N7 reactivities')
+ax.legend(loc='upper right')
+fig.tight_layout()
+plot.savefig(args.out)
\ No newline at end of file
diff --git a/msDMS_Saleem&Miller_2025/pymol_rmsd.py b/msDMS_Saleem&Miller_2025/pymol_rmsd.py
new file mode 100644
index 0000000..67d8e9d
--- /dev/null
+++ b/msDMS_Saleem&Miller_2025/pymol_rmsd.py
@@ -0,0 +1,52 @@
+from pymol import cmd
+import argparse
+from glob import glob as gb
+
+def get_models(prefix, dirpath):
+    models = []
+    for model in gb(dirpath + prefix + '*'):
+        if model.split('.')[-1] != 'cif':
+            continue
+        models.append(model)
+    #print(models)
+    return models
+
+def get_rmsd(reference_obj, predicted):
+    pred_obj = predicted.split('\\')[-1].split('.')[0]
+
+    print(reference_obj, pred_obj)
+    cmd.load(predicted, pred_obj)
+    #cmd.select(reference_obj)
+    #cmd.select(pred_obj)
+    out = cmd.align(reference_obj, pred_obj, cycles=0)
+    cmd.delete(pred_obj)
+    print(out[0], out[3])
+    return out[3]
+
+def write_rmsds(rmsd_dict, outname):
+    out = open(outname, 'w')
+    for model in rmsd_dict.keys():
+        out.write(model.split('\\')[-1].split('.')[0] + '\t' + str(rmsd_dict[model]) + '\n')
+
+def _rmsds_main(ref_struct, pred_prefix, output, directory):
+    model_list = get_models(pred_prefix, directory)
+    ref_obj = ref_struct.split('/')[-1].split('.')[0]
+    cmd.load(ref_struct, ref_obj)
+    rmsds = {}
+    for model in model_list:
+        rmsds[model] = get_rmsd(ref_obj, model)
+    write_rmsds(rmsds, output)
+
+if __name__ == '__main__':
+    argparser = argparse.ArgumentParser(description='Compare many structure predictions to a single pdb structure. Output is RMSD in angstroms for all atoms')
+    argparser.add_argument('--pred_dir', default='./', help='directory to search for predicted structures')
+    argparser.add_argument('--pred_prefix', required=True, help='prefix for all predicted structures to compare')
+    argparser.add_argument('--ref_struct', required=True, help='reference structure to compare against')
+    argparser.add_argument('--output', required=True, help='output filename including file ext e.g. (output.txt)')
+    args = argparser.parse_args()
+
+    _rmsds_main(args.ref_struct, args.pred_prefix, args.output, args.pred_dir)
+    
+
+
+
diff --git a/msDMS_Saleem&Miller_2025/pymol_sasa.py b/msDMS_Saleem&Miller_2025/pymol_sasa.py
new file mode 100644
index 0000000..248c3f3
--- /dev/null
+++ b/msDMS_Saleem&Miller_2025/pymol_sasa.py
@@ -0,0 +1,38 @@
+
+#__main__.pymol_argv = ['pymol','-qc']
+
+import sys
+import argparse
+
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--pdbfile')
+parser.add_argument('--chain')
+parser.add_argument('--output')
+
+args = parser.parse_args()
+
+radius = 4
+
+cmd.set('dot_solvent', 1)
+cmd.set('dot_density', 3)
+cmd.set('solvent_radius', radius)
+
+cmd.load(args.pdbfile)  
+
+
+
+rnumbers = []
+cmd.iterate('chain {} and resn G and name N7'.format(args.chain), 'rnumbers.append(resi)')
+
+out = open(args.output,'w')
+for i in rnumbers:
+    
+    if cmd.count_atoms('chain {} and resi {} and name N7'.format(args.chain, i)) == 1:
+        
+        v = cmd.get_area('chain {} and resi {} and name N7'.format(args.chain, i))
+        out.write('{} {}\n'.format(i,v))
+
+out.close()
+
+
diff --git a/msDMS_Saleem&Miller_2025/random_ringsample.py b/msDMS_Saleem&Miller_2025/random_ringsample.py
new file mode 100644
index 0000000..c97733a
--- /dev/null
+++ b/msDMS_Saleem&Miller_2025/random_ringsample.py
@@ -0,0 +1,103 @@
+import argparse
+import matplotlib.pyplot as plot
+import numpy as np
+import random
+import copy
+
+plot.rcParams['pdf.fonttype'] = 42
+plot.rcParams['font.sans-serif'] = 'Arial'
+
+def read_ring(input_rings):
+    length = 0
+    n1rings = []
+    n7rings = []
+    with open(input_rings) as ringin:
+        
+        for i,line in enumerate(ringin):
+            if i > 1:
+                spl = line.split()
+                n1 = int(spl[0])
+                n2 = int(spl[1])
+                a = float(spl[9])
+                #print(a)
+                if a < 2:
+                    continue
+                else:
+                    if n1 <= length and n2 <= length: #filter n1 rings
+                        n1rings.append((n1,n2))
+                    else:
+                        n7rings.append((n1,n2))
+            else:
+                if i == 0:
+                    length = int(line.split()[0])
+
+    #print(n1rings)
+    return length,n1rings, n7rings
+
+def read_primary_nts(fname):
+    p_locus = []
+    with open(fname) as inp:
+        for line in inp:
+            spl = line.split('-')
+            for i in range(int(spl[0]), int(spl[1]) + 1):
+                p_locus.append(i)
+    print(p_locus)
+    return p_locus
+
+def histogram(en, label = '', title='',name='sfd_hist.pdf'):
+    fig, ax = plot.subplots()
+    ax.hist(en, histtype = 'bar', bins=[0.0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0],label=label, align='left')
+    ax.axvline(x=np.percentile(en, [50]), label=label + " median " + str(np.percentile(en, [50])))
+    ax.axvline(x=np.mean(en), label=label + " mean " + str(np.mean(en)))
+    ax.axvline(x=.84, label='Observed')
+
+    
+    ax.set_ylabel('Count')
+    ax.set_xlabel('% rings between tertiary sites')
+
+    
+    ax.set_title(title)
+    ax.legend(loc='upper right')
+    fig.tight_layout()
+    plot.savefig(name)
+
+def sample_fold(rings,loci_nts, rna):
+    encapsulated = 0
+    for i in rings:
+        rna_copy = copy.deepcopy(rna)
+        nt1 = random.choice(rna_copy)
+        rna_copy.remove(nt1)
+        if nt1 - 1 in rna_copy:
+            rna_copy.remove(nt1 - 1)
+        if nt1 + 1 in rna_copy:
+            rna_copy.remove(nt1 + 1)
+        nt2 = random.choice(rna_copy)
+        #print(nt1, nt2)
+        if nt1 in loci_nts and nt2 in loci_nts:
+            encapsulated += 1
+    return encapsulated / len(rings)
+
+if __name__ == '__main__':
+    # tpp_tertnts.txt p546_tertnts.txt
+    loci = read_primary_nts('p546_tertnts.txt')
+
+    # p546_r2r3_intersect_shared_corrbuffer.txt satTPP_intersect_shared_corrbuffer.txt
+    rna_len,n1,n7 = read_ring('p546_r2r3_intersect_shared_corrbuffer.txt')
+
+    rna_nts = []
+    for i in range(1, rna_len + 1):
+        rna_nts.append(i)
+
+    samples = []
+    while len(samples) < 10000:
+        samples.append(sample_fold(n1, loci, rna_nts))
+    print(samples)
+
+    
+    hit = []
+    for i in samples:
+        if i >= 0.83: # 
+            hit.append(i)
+    print(len(hit)/ len(samples))
+    histogram(samples, title='P546', name='P546_10k_ringsample_hist.pdf')
+   
diff --git a/msDMS_Saleem&Miller_2025/si2_d+e_final.py b/msDMS_Saleem&Miller_2025/si2_d+e_final.py
new file mode 100644
index 0000000..df692f5
--- /dev/null
+++ b/msDMS_Saleem&Miller_2025/si2_d+e_final.py
@@ -0,0 +1,67 @@
+from json.tool import main
+import matplotlib
+matplotlib.use('Agg')
+import matplotlib.pyplot as plot
+import sys
+import RNAtools2 as RNAtools
+import numpy as np
+from sklearn.metrics import roc_curve, roc_auc_score
+from ReactivityProfile import ReactivityProfile
+plot.rcParams['pdf.fonttype'] = 42
+plot.rcParams['font.sans-serif'] = 'Arial'
+import argparse
+
+argparser = argparse.ArgumentParser()
+argparser.add_argument('--prof1')
+argparser.add_argument('--prof2')
+argparser.add_argument('--ctfile')
+argparser.add_argument('--out')
+args = argparser.parse_args()
+profobj = ReactivityProfile(args.prof1)
+prof2obj = ReactivityProfile(args.prof2)
+ctobj = RNAtools.CT(args.ctfile)
+pairedlist = ctobj.pairedResidueList(False)
+
+def getNtLists(profile, pairedlist, ntype):
+    
+        react = []
+        ispaired = []
+        for i,v in enumerate(profile.subprofile):               
+            if v > -10 and profile.sequence[i] == ntype:
+                react.append(v)
+                ispaired.append( int((i+1) in pairedlist) )
+            
+        print(react)
+        return react, ispaired
+
+
+labels = ('A', 'C', 'G', 'U')
+scores = []
+scores2 = []
+
+fig,ax = plot.subplots(1, 4, figsize=(8,2))
+
+for i,nt in enumerate(labels):
+    r, p = getNtLists(profobj, pairedlist, nt)
+    scores.append(roc_auc_score(p,r))
+
+for i,nt in enumerate(labels):
+    r, p = getNtLists(prof2obj, pairedlist, nt)
+    scores2.append(roc_auc_score(p,r))
+
+
+fig,ax = plot.subplots()
+
+barcount = 4
+ind = np.arange(barcount)
+width = 0.3
+
+fig, ax = plot.subplots()
+
+ax.bar(ind,scores, width, label='new')
+ax.bar(ind+width , scores2, width, label='old')
+ax.set_xticks(ind + width/2, labels)
+
+
+fig.tight_layout()
+plot.savefig(args.out)
\ No newline at end of file
diff --git a/msDMS_Saleem&Miller_2025/si2_g_final.py b/msDMS_Saleem&Miller_2025/si2_g_final.py
new file mode 100644
index 0000000..719f634
--- /dev/null
+++ b/msDMS_Saleem&Miller_2025/si2_g_final.py
@@ -0,0 +1,60 @@
+import re
+import sys
+import argparse
+from sklearn.metrics import roc_curve, roc_auc_score
+import matplotlib.pyplot as pyplot
+import numpy as np
+
+from ReactivityProfile import ReactivityProfile as rprofile
+import RNAStructureObjects as RNAtools
+
+argparser = argparse.ArgumentParser()
+argparser.add_argument('--n7profile')
+argparser.add_argument('--ctfile')
+argparser.add_argument('--out')
+args = argparser.parse_args()
+
+n7 = rprofile(args.n7profile)
+
+bgprof = n7.backprofile
+rawprof = n7.rawprofile
+bgsub = n7.subprofile
+normprof = n7.normprofile
+
+react, seq = [],[]
+x=0
+
+
+while x < (len(n7.sequence)):
+    seq.append(n7.sequence[x])
+    react.append(n7.subprofile[x])
+    x += 1
+
+
+ct = RNAtools.CT(args.ctfile)
+pairs = ct.pairedResidueList()
+
+s,r = [],[]
+
+# go through the RNA, only looking at Gs with defined reactivites (not NaN), check to see if the G measured is in the crystal structure as well
+for i,v in enumerate(seq):
+    if v == 'G' and np.isfinite(react[i]):
+        r.append(react[i])
+        if i in pairs:
+            s.append(True)
+        else:
+            s.append(False)
+
+
+# compute ROC curve and plot, along with AUC
+tmp = roc_curve(s, r, drop_intermediate=False)
+pyplot.plot(tmp[0], tmp[1], label='Basepairing: {:.2f}'.format(roc_auc_score(s,r)))
+    
+
+pyplot.plot([0, 1], [0, 1], linestyle="--")
+pyplot.xlim([0.0, 1.0])
+pyplot.ylim([0.0, 1.05])
+pyplot.title('')
+pyplot.legend()
+
+pyplot.savefig(args.out)
\ No newline at end of file
diff --git a/msDMS_Saleem&Miller_2025/si2_h+i_final.py b/msDMS_Saleem&Miller_2025/si2_h+i_final.py
new file mode 100644
index 0000000..450bf1c
--- /dev/null
+++ b/msDMS_Saleem&Miller_2025/si2_h+i_final.py
@@ -0,0 +1,238 @@
+import pandas as pd
+from sklearn.metrics import roc_curve, roc_auc_score
+import matplotlib.pyplot as plot
+import numpy as np
+import scipy.stats
+import math
+import itertools
+from ReactivityProfile import ReactivityProfile as rprofile
+import argparse
+
+plot.rcParams['pdf.fonttype'] = 42
+plot.rcParams['font.sans-serif'] = 'Arial'
+
+argparser = argparse.ArgumentParser()
+argparser.add_argument('--prof1', required=True)
+argparser.add_argument('--prof2')
+argparser.add_argument('--out', required=True)
+args = argparser.parse_args()
+
+
+# read in the N7 reactivity file
+n7 = rprofile(args.prof1)
+n72 = rprofile(args.prof2)
+
+
+
+def reactivityByneighbors(self, add=None, GA=False, Triplet=False, pos=1, byNT=False, name=''):
+    '''
+    Takes a reactivity profile as an argument.
+    Plots reactivity rates based on neighbors. 
+    Pass a negative number to look upstream, positive for downstream. 
+    Will group purines and pyrimidines by default.
+    '''
+
+    def adjacent_values(vals, q1, q3):
+        upper_adjacent_value = q3 + (q3 - q1) * 1.5
+        upper_adjacent_value = np.clip(upper_adjacent_value, q3, vals[-1])
+
+        lower_adjacent_value = q1 - (q3 - q1) * 1.5
+        lower_adjacent_value = np.clip(lower_adjacent_value, vals[0], q1)
+        return lower_adjacent_value, upper_adjacent_value
+
+
+    def set_axis_style(ax, labels):
+        ax.set_xticks(np.arange(1, len(labels) + 1), labels=labels)
+        ax.set_xlim(0.25, len(labels) + 0.75)
+        ax.set_xlabel('Sample name')
+
+    if add is not None:
+        self.subprofile = np.concatenate([self.subprofile, add.subprofile])
+        self.sequence = np.concatenate([self.sequence, add.sequence])
+
+    def stars(p):
+    ##get stars for annotating plots using a p value
+        if p < 0.000001:
+            return '******'
+        elif p < 0.00001:
+            return '*****'
+        elif p < 0.0001:
+            return "****"
+        elif (p < 0.001):
+            return "***"
+        elif (p < 0.01):
+            return "**"
+        elif (p < 0.05):
+            return "*"
+        else:
+            return "-"
+
+    if Triplet:
+            nts = ['A','G','U','C']
+            _triplets = {}
+            for i in nts:
+                for j in nts:
+                    for k in nts:
+                        _triplets[i + j + k] = []
+                
+            for n in nts:
+                for i, v in enumerate(self.subprofile):
+                    if np.isfinite(v) and self.sequence[i] == n:
+                        pre = self.sequence[i - 1]
+                        nxt = self.sequence[i + 1]
+                        _triplets[pre + n + nxt].append(v)
+
+
+    elif not GA:
+            '''
+            Data is ordered (A) purines, pyrimidines then (G) purines, pyrimidines, etc.
+            
+            '''
+
+
+            nts = ['A','G','U','C']
+            data = []
+            labels = []
+            for n in nts:
+                A = []
+                G = []
+                T = []
+                C = []
+                purines = []
+                pyrimidine = []
+                for i,v in enumerate(self.subprofile):
+                    if np.isfinite(v) and self.sequence[i] == n:
+                        if byNT:
+                            if self.sequence[i + pos] == 'A':
+                                A.append(v)
+                            elif self.sequence[i + pos] == 'G':
+                                G.append(v)
+                            elif self.sequence[i + pos] == 'U':
+                                T.append(v)
+                            else:
+                                C.append(v)
+                        else:
+                            if self.sequence[i + pos] == 'A' or self.sequence[i + pos] == 'G':
+                                purines.append(v)
+                            else:
+                                pyrimidine.append(v)
+                if byNT:
+                    data.append(A.copy())
+                    data.append(G.copy())
+                    data.append(T.copy())
+                    data.append(C.copy())
+                    labels.append(str(n) + ' A')
+                    labels.append(str(n) + ' G')
+                    labels.append(str(n) + ' T')
+                    labels.append(str(n) + ' C')
+                else:
+                    data.append(purines.copy())
+                    data.append(pyrimidine.copy())
+                    labels.append(str(n) + ' Purines')
+                    labels.append(str(n) + ' Pyrimadines')
+
+    else:
+        if byNT:
+            A = []
+            G = []
+            T = []
+            C = []
+
+            for i, v in enumerate(self.subprofile):
+                if np.isfinite(v):
+                    if self.sequence[i + pos] == 'A':
+                        A.append(v)
+                    elif self.sequence[i + pos] == 'G':
+                        G.append(v)
+                    elif self.sequence[i + pos] == 'U':
+                        T.append(v)
+                    else:
+                        C.append(v)
+            labels = ['A','G','U','C']
+
+            data = [A,G,T,C]
+
+
+        else:
+                purines = []
+                pyrimidine = []
+                print('GA')
+                for i, v in enumerate(self.normprofile):
+                    if np.isfinite(v):
+                        if self.sequence[i + pos] == 'A' or self.sequence[i + pos] == 'G':
+                            purines.append(v)
+                        else:
+                            pyrimidine.append(v)
+
+                labels = ['purines', 'pyrimidines']
+                data = [purines, pyrimidine]
+    if not Triplet:
+        if GA:
+            fig, ax = plot.subplots(nrows = 2, ncols = 1, figsize=(6,8))
+        elif not GA and not byNT:
+            fig, ax = plot.subplots(nrows = 2, ncols = 1, figsize=(8,16))
+        else:
+            fig, ax = plot.subplots(nrows = 2, ncols = 1, figsize=(30,30))
+        quants = []
+        for i in data:
+            quants.append([.25,.75])
+
+
+        ax[0].hist(data[0], histtype = 'step', label='Purines')
+        ax[0].hist(data[1], histtype = 'step', label='Pyrimidines')
+        ax[0].legend()
+
+        for i in data:
+            quantiles = np.quantile(i, np.array([ 0.75]))
+            print(quantiles)
+
+        tabledata = [['Comparison', 'P value']]
+        for i,j in itertools.combinations(data, 2):
+
+                u_stat, p = scipy.stats.mannwhitneyu(i, j, alternative='two-sided')
+
+                p_value = p
+                print(p_value)
+                print('{}   {}'.format(data.index(i), data.index(j)))
+
+                #get max and min for putting the annotation in the right place
+                y_max = np.max(np.concatenate((i, j)))
+                y_min = np.min(np.concatenate((i, j)))
+
+                tabledata.append([labels[data.index(i)] + ' vs. ' + labels[data.index(j)], str(p_value)])
+
+            
+        ax[1].table(cellText=tabledata, loc='center' )
+
+        fig.tight_layout(pad = 2)
+        fig.savefig(name + '.pdf')
+    else:
+        out = open('{}.txt'.format(name), 'w')
+        out.write('triplet\tmean_rep1\tmedian_rep1\ttriplet_count_rep1\n')
+        quants = []
+        labels = []
+        data = []
+        for i in _triplets:
+            print(i)
+            try:
+                quantiles = np.quantile(_triplets[i], np.array([0.5]))
+                mean = np.mean(_triplets[i])
+                out.write('{}\t{:.6}\t{:.6}\t{}\n'.format(i, mean, quantiles[0], len(_triplets[i])))
+                labels.append(i)
+                data.append(_triplets[i])
+                quants.append([.25,.75])
+            except:
+                continue
+
+        fig, ax = plot.subplots(nrows = 1, ncols = 1, figsize=(30,30))
+
+        ax.violinplot(data, showmedians=True, quantiles=quants)
+
+        ax.set_xticks(np.arange(1, len(labels) + 1), labels=labels)
+        ax.set_ylim(0,0.25)
+        ax.set_ylabel('BG sub reactivity')
+        fig.tight_layout(pad = 2)
+        fig.savefig(name + '.pdf')
+
+
+reactivityByneighbors(n7, add=n72, GA=True, byNT=False, Triplet=False, pos=1, name=args.out)
diff --git a/msDMS_Saleem&Miller_2025/si2_k_final.py b/msDMS_Saleem&Miller_2025/si2_k_final.py
new file mode 100644
index 0000000..d5be507
--- /dev/null
+++ b/msDMS_Saleem&Miller_2025/si2_k_final.py
@@ -0,0 +1,67 @@
+
+import re
+import sys
+import argparse
+from sklearn.metrics import roc_curve, roc_auc_score
+import matplotlib.pyplot as pyplot
+import numpy as np
+
+from ReactivityProfile import ReactivityProfile as rprofile
+
+argparser = argparse.ArgumentParser()
+argparser.add_argument('--prof')
+argparser.add_argument('--sasa')
+argparser.add_argument('--out')
+args = argparser.parse_args()
+# read in the N7 reactivity file
+n7 = rprofile(args.prof)
+
+bgprof = n7.backprofile
+rawprof = n7.rawprofile
+bgsub = n7.subprofile
+normprof = n7.normprofile
+
+react, seq = [],[]
+x=0
+
+
+while x < (len(n7.sequence)):
+    seq.append(n7.sequence[x])
+    react.append(n7.normprofile[x])
+    x += 1
+
+
+# read in the sasa file output from pymol script
+sasa = {}
+with open(args.sasa) as inp:
+    for line in inp:
+        spl = line.split()
+        sasa[int(spl[0])-1] = float(spl[1])
+        
+print(sasa)
+# plot the data at several different cutoffs
+for cut in (1,2):
+    s,r = [],[]
+
+    # go through the RNA, only looking at Gs with defined reactivites (not NaN), check to see if the G measured is in the crystal structure as well
+    for i,v in enumerate(seq):
+        if v == 'G' and np.isfinite(react[i]) and i in sasa:
+            r.append(react[i])
+            s.append(int(sasa[i] < cut))
+    
+
+
+    # compute ROC curve and plot, along with AUC
+    tmp = roc_curve(s, r, drop_intermediate=False)
+    pyplot.plot(tmp[0], tmp[1], label='{}: {:.2f}'.format(cut, roc_auc_score(s,r)))
+    
+
+pyplot.plot([0, 1], [0, 1], linestyle="--")
+pyplot.xlim([0.0, 1.0])
+pyplot.ylim([0.0, 1.05])
+pyplot.title('')
+pyplot.legend()
+
+pyplot.savefig(args.out)
+
+
diff --git a/msDMS_Saleem&Miller_2025/si3_ac_calcs.py b/msDMS_Saleem&Miller_2025/si3_ac_calcs.py
new file mode 100644
index 0000000..c7c6603
--- /dev/null
+++ b/msDMS_Saleem&Miller_2025/si3_ac_calcs.py
@@ -0,0 +1,96 @@
+import argparse
+import matplotlib.pyplot as plot
+import numpy as np
+import glob
+from ReactivityProfile import ReactivityProfile as rprofile
+
+argparser = argparse.ArgumentParser()
+argparser.add_argument('--sasa')
+argparser.add_argument('--subunit', choices=['16', '23'])
+argparser.add_argument('--out')
+args = argparser.parse_args()
+
+prot_cut = 1.6
+
+sasa_cut = 1
+
+infile = args.sasa
+
+
+out = open(args.out,'w')
+
+adjustframe = 0
+def adjust_frame(profile, difference):
+    profile.nts = [x + difference for x in profile.nts]
+
+#match end half of profile names
+false_p = []
+false_out = open('{}_false_negatives.txt'.format(sasa.split('.')[0]), 'w')
+for file in glob.glob("*ec{}S_profile.txtga".format(args.subunit)):
+    print(file)
+    n7 = rprofile(file)
+    adjust_frame(n7,adjustframe)
+
+    # read in the sasa file output from pymol script
+    sasa = {}
+    with open(infile) as inp:
+        for line in inp:
+            spl = line.split()
+            sasa[int(spl[0])] = float(spl[1])
+
+    x = 0
+    protected = {}
+    cfprotected = {}
+    while x < len(n7.normprofile):
+        ##loop through the normprofile and take any normalized values above what we consider "protected"
+        if np.isfinite(n7.normprofile[x]) and n7.normprofile[x] >= prot_cut and n7.sequence[x] == 'G' and n7.nts[x] in sasa:
+            protected[n7.nts[x]] = n7.normprofile[x]
+
+        x += 1
+
+
+    positives = {}
+    negatives = {}
+    for i in sasa:
+            ##if measured area is greater than our cutoff, it is unprotected and therefore a "negative"
+            if sasa[i] >= sasa_cut:
+                negatives[i] = sasa[i]
+            ##if measured area is lower than our cutoff it is protected and therefore "positive"
+            if sasa[i] < sasa_cut:
+                positives[i] =  sasa[i]
+
+    TP = 0
+    FP = 0
+    TN = 0
+    FN = 0
+    for i in sasa:
+        ##compare our dict of protected values against the positive and negatives at every position, tick up appropriate bin
+        if i in protected and i in positives:
+            TP += 1
+        if i in protected and i in negatives:
+            FP += 1
+            
+        if i in positives and i not in protected:
+            FN += 1
+            false_p.append(str(i))
+        if i in negatives and i not in protected:
+            TN += 1
+
+    out.write(' TP: {}\n FP: {}\n TN: {}\n FN: {}\n'.format(TP, FP, TN, FN))
+    out.write('Sensitivity: {}\n'.format(TP/(TP+FN)))
+    out.write('Specificity: {}\n'.format(TN/(TN+FP)))
+    out.write('PPV: {}\n'.format(TP/(TP+FP)))
+    out.write('NPV: {}\n'.format(TN/(TN+FN)))
+    out.write('Diagnostic Accuracy: {}\n'.format((TP + TN)/(TP + FP + FN + TN))) 
+    out.write('Protection cutoff: {}\nSASA cutoff:{}\n'.format(prot_cut,sasa_cut))
+    out.write(str(len(sasa)))
+    out.write('\n')
+    out.write(str(TP+FP+TN+FN))
+    out.write('\n')
+
+print(len(set(false_p)))
+for nt in set(false_p):
+    false_out.write(nt+' ')
+
+out.close()
+
diff --git a/msDMS_Saleem&Miller_2025/si3_f_final.py b/msDMS_Saleem&Miller_2025/si3_f_final.py
new file mode 100644
index 0000000..1b1232c
--- /dev/null
+++ b/msDMS_Saleem&Miller_2025/si3_f_final.py
@@ -0,0 +1,69 @@
+import numpy as np
+from ReactivityProfile import ReactivityProfile as rprofile
+import argparse
+from pymol import cmd
+
+def get_nts_profile(profile):
+    nt_list = []
+    prof = rprofile(profile)
+    for i,v in enumerate(prof.normprofile):
+        if np.isfinite(v) and v >= 1.6:
+            nt_list.append(i + 1)
+    return nt_list
+
+def adjust_frame(number, listnts):
+    adjustednts = []
+    for i in listnts:
+        adjustednts.append(i+number)
+    return adjustednts
+
+
+
+def get_nts(input_file):
+    nts = []
+    with open(input_file, 'r') as inp:
+        for i,line in enumerate(inp):
+            if i == 0 or i == 1:
+                continue
+            spl= line.split()
+            nts.append(int(spl[0]))
+    return nts
+
+def get_nt_list(inputfile):
+    with open(inputfile, 'r') as inp:
+        for line in inp:
+            nts = line.split()
+    return nts
+
+
+def color_nts(chain, nt_list):
+    cmd.set_color('protected', (83, 48, 95))
+    for G in nt_list:
+        try:
+            cmd.select('protected_nts', 'chain {} and resi {}'.format(chain, G), merge=1)
+            cmd.color('protected', 'chain {} and resi {}'.format(chain, G))
+        except:
+            print('Nt {} not in structure'.format(G))
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--pdbfile', required=True, help='path to crystal structure')
+    parser.add_argument('--chain', type=str, required=True, help='RNA chain id in crystal structure')
+    parser.add_argument('--input', required=True, help='path to infile')
+    parser.add_argument('--outputName', required=True)
+    parser.add_argument('--adjustframe', type=int)
+    parser.add_argument('--profile', action="store_true")
+    args = parser.parse_args()
+    cmd.load(args.pdbfile)
+    #to_color = get_nts(args.input)
+    if not args.profile:
+        to_color = get_nt_list(args.input)
+    else:
+        to_color = get_nts_profile(args.input)
+    if args.adjustframe is not None:
+        to_color = adjust_frame(args.adjustframe,to_color)
+
+    color_nts(args.chain, to_color)
+
+    cmd.save(args.outputName + '.pse')
\ No newline at end of file
diff --git a/msDMS_Saleem&Miller_2025/si6_a_final.py b/msDMS_Saleem&Miller_2025/si6_a_final.py
new file mode 100644
index 0000000..b23d8b5
--- /dev/null
+++ b/msDMS_Saleem&Miller_2025/si6_a_final.py
@@ -0,0 +1,99 @@
+
+import random
+import numpy as np
+from ReactivityProfile import ReactivityProfile as rprofile
+import RNAStructureObjects as RNAtools
+
+import matplotlib.pyplot as plot
+import scipy.stats
+
+import sys, os, math
+import argparse
+plot.rcParams['pdf.fonttype'] = 42
+plot.rcParams['font.sans-serif'] = 'Arial'
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--profile', required=True, help='path to profile')
+parser.add_argument('--ctfile', required=True)
+parser.add_argument('--profile2', required=True, help='path to profile')
+parser.add_argument('--ctfile2', required=True)
+parser.add_argument('--output', required=True, help='output prefix')
+parser.add_argument('--cutoff', type=int, default=3)
+parser.add_argument('--filterdistance', type=int, default=10)
+args = parser.parse_args()
+
+
+def contact_filter(protections,CT1, filter_distance=0):
+    
+    num_within_range = {}
+    for i in protections:
+        count = 0
+        for j in protections:
+            if i == j:
+                continue
+            if CT1.contactDistance(i, j) > filter_distance:
+                #print('filtered')
+                continue
+            else:
+                count += 1
+        num_within_range[i] = count
+    return num_within_range
+
+def violin_plot(name, labels, data, p):
+    quants = []
+    for i in data:
+        quants.append([.25,.75])
+
+    fig, ax = plot.subplots(nrows = 1, ncols = 1)
+    ax.violinplot(data, showmedians=True, quantiles=quants)
+    ax.set_xticks(np.arange(1, len(labels) + 1), labels=labels)
+    #ax.set_ylim(0, 40)
+    ax.set_ylabel('number of protections within CF 5')
+    ax.set_title('P = {}'.format(p))
+    fig.tight_layout(pad = 2)
+    fig.savefig(name + '.pdf')
+
+
+prof = rprofile(args.profile)
+CT = RNAtools.CT(args.ctfile)
+gs = []
+prot = []
+random_prot = []
+for i,v in enumerate(prof.normprofile):
+    if np.isfinite(v):
+        gs.append(i+1)
+    if v >= args.cutoff:
+        prot.append(i +1)
+
+random_prot = random.sample(gs, k=len(prot))
+print(prot)
+print(random_prot)
+random_counts = contact_filter(random_prot, CT, args.filterdistance)
+counts = contact_filter(prot, CT, args.filterdistance)
+
+prof2 = rprofile(args.profile2)
+CT2 = RNAtools.CT(args.ctfile2)
+gs2 = []
+prot2 = []
+random_prot2 = []
+for i,v in enumerate(prof2.normprofile):
+    if np.isfinite(v):
+        gs2.append(i+1)
+    if v >= args.cutoff:
+        prot2.append(i +1)
+
+random_prot2 = random.sample(gs2, k=len(prot2))
+print(prot2)
+print(random_prot2)
+random_counts2 = contact_filter(random_prot2, CT2, args.filterdistance)
+counts2 = contact_filter(prot2, CT2, args.filterdistance)
+
+x_labels = ['16s Observed', '23s real', '16s Random Distribution', '23s random']
+data_toplot = [list(counts.values()), list(counts2.values()), list(random_counts.values()), list(random_counts2.values())]
+u, p = scipy.stats.mannwhitneyu(list(counts.values()), list(random_counts.values()), alternative='two-sided')
+u2, p2 = scipy.stats.mannwhitneyu(list(counts2.values()), list(random_counts2.values()), alternative='two-sided')
+print(p)
+print(p2)
+
+violin_plot(args.output, x_labels, data_toplot, str(p) + '\t' + str(p2))
+

From 3c1c46a3994ba6bc832a6d7cb1ec91185dfc1207 Mon Sep 17 00:00:00 2001
From: tmsmiller <1tmsmiller@gmail.com>
Date: Fri, 5 Dec 2025 14:25:22 -0600
Subject: [PATCH 8/8] Create README.md

---
 msDMS_Saleem&Miller_2025/README.md | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 msDMS_Saleem&Miller_2025/README.md

diff --git a/msDMS_Saleem&Miller_2025/README.md b/msDMS_Saleem&Miller_2025/README.md
new file mode 100644
index 0000000..2320fb3
--- /dev/null
+++ b/msDMS_Saleem&Miller_2025/README.md
@@ -0,0 +1 @@
+Code relevant to the msDMS-MaP manuscript that cannot be found in other Mustoe Lab repositories. Mainly one-off analyses for figures.