diff --git a/msDMS_Saleem&Miller_2025/2b+c_final.py b/msDMS_Saleem&Miller_2025/2b+c_final.py
new file mode 100644
index 0000000..f90b8ef
--- /dev/null
+++ b/msDMS_Saleem&Miller_2025/2b+c_final.py
@@ -0,0 +1,133 @@
+from ReactivityProfile import ReactivityProfile as rprof
+import matplotlib.pyplot as plot
+import numpy as np
+import argparse
+plot.rcParams['pdf.fonttype'] = 42
+plot.rcParams['font.sans-serif'] = 'Arial'
+
+
+def reactivityByNt(self, resnums = None, nts=None, name = None):
+        """ return a list of reactivities for a given set of nts (array), or nt type"""
+        
+        pro = self.profile(name)
+        
+        mask = np.isfinite(pro)
+        #with np.errstate(invalid='ignore'):
+        #    mask = mask & (pro > -0.3) & (pro < 4.0)
+
+        try:
+            ntit = iter(nts)
+            ntmask = (self.sequence == next(ntit))
+            for n in ntit:
+                ntmask = ntmask | (self.sequence == n) 
+            
+            mask = mask & ntmask
+        except TypeError:
+            pass
+
+        try:
+            resnums = set(resnums)
+            mask = mask & np.array([i in resnums for i in self.nts])
+        except TypeError:
+            pass
+
+        return pro[mask]
+
+
+def mutHistogram(self, name = None,name_2 = None, name_3=None, nts = None, resnums = None,
+                     bins=100,axes = None, pool=False, writename='mutHist.pdf', **kwargs):
+        
+
+        
+        write = False
+        if axes is None:
+            fig, axes = plot.subplots()
+            write = True
+
+        if pool:
+            rxn = ((self.reactivityByNt(name='norm', nts='G', resnums = resnums)))
+            rxn_2 = ((name.reactivityByNt(name='norm', nts='G', resnums = resnums)))
+            rxn_3 = ((name_2.reactivityByNt(name='norm', nts='G', resnums = resnums)))
+            rxn_4 = ((name_3.reactivityByNt(name='norm', nts='G', resnums = resnums)))
+            
+            bins=np.histogram(np.hstack((rxn,rxn_2,rxn_3)), bins=(np.arange(0.0, 10)))[1]
+            
+            axes.hist(rxn, histtype = 'step', bins = bins, label='InCell', **kwargs)
+            
+            axes.hist(rxn_2, histtype= 'step', bins=bins, label='Cell Free', **kwargs)
+            
+            axes.hist(rxn_3, histtype= 'step', bins=bins, label='Urea', **kwargs)
+            axes.hist(rxn_4, histtype= 'step', bins=bins, label='Cell Free -Mg', **kwargs)
+        
+        else:
+        
+            rxn = ((self.reactivityByNt(name='norm', nts='G', resnums = resnums)))        
+            axes.hist(rxn, histtype = 'step', bins = bins, label='In Cell', **kwargs)
+
+            if name is not None:
+
+                rxn_2 = ((name.reactivityByNt(name='norm', nts='G', resnums = resnums)))
+
+                axes.hist(rxn_2, histtype= 'step', bins=bins, label='Cell Free', **kwargs)
+        
+            if name_2 is not None:
+
+                rxn_3 = ((name_2.reactivityByNt(name='norm', nts='G', resnums = resnums)))
+
+                axes.hist(rxn_3, histtype= 'step', bins=bins, label='Urea', **kwargs)
+            if name_3 is not None:
+
+                rxn_4 = ((name_3.reactivityByNt(name='norm', nts='G', resnums = resnums)))
+
+                axes.hist(rxn_4, histtype= 'step', bins=bins, label='Cell Free -Mg', **kwargs)
+
+
+        
+
+        axes.legend(loc='upper left')
+        axes.set_ylabel('Count')
+        axes.set_xlabel('Norm N7 reactivity')
+        axes.set_xlim(-3.0, 3.0)
+        axes.set_ylim(0,350)
+        axes.yaxis.set_ticks(np.arange(0, 375, 25))
+        axes.set_title('23s')
+
+        if write:
+            plot.savefig(writename)
+
+
+
+
+def winsorize(prof):
+    for i, v in enumerate(prof):
+        if prof[i] > 3:
+            prof[i] = 3
+        if prof[i] < -3:
+            prof[i] = -3
+
+    return prof
+
+
+if __name__ == '__main__':
+    ntorder = ('A','C','G','U')
+
+    masknt = ('A', 'C', 'U')
+    argparser = argparse.ArgumentParser()
+    argparser.add_argument('--incell')
+    argparser.add_argument('--cf')
+    argparser.add_argument('--cfnomg')
+    argparser.add_argument('--urea')
+    args = argparser.parse_args()
+    #read in reactivity profiles
+    incell = rprof(args.incell)
+    cellfree = rprof(args.cf)
+    cf_nomg = rprof(args.cfnomg)
+    urea = rprof(args.urea)
+
+    incell.normprofile = winsorize(incell.normprofile)
+    cellfree.normprofile = winsorize(cellfree.normprofile)
+    urea.normprofile = winsorize(urea.normprofile)
+    cf_nomg.normprofile= winsorize(cf_nomg.normprofile)
+
+    #pass in incell first, then denatured
+    mutHistogram(incell,cellfree, urea,cf_nomg, bins=np.linspace(-3.0, 3.0, num=25),writename ='Fig2B_23s_InCellvsCFvsUrea_norm_top_08052024.pdf')
\ No newline at end of file
diff --git a/msDMS_Saleem&Miller_2025/2d_final.py b/msDMS_Saleem&Miller_2025/2d_final.py
new file mode 100644
index 0000000..31e4df4
--- /dev/null
+++ b/msDMS_Saleem&Miller_2025/2d_final.py
@@ -0,0 +1,75 @@
+from sklearn.metrics import roc_curve, roc_auc_score
+import matplotlib.pyplot as plot
+import numpy as np
+import argparse
+
+from ReactivityProfile import ReactivityProfile as rprofile
+
+argparser = argparse.ArgumentParser()
+argparser.add_argument('--n7prof')
+argparser.add_argumnet('--sasa')
+args = argparser.parse_args()
+
+
+
+n7 = rprofile(args.n7prof)
+
+infile = args.sasa
+
+
+# read in the sasa file output from pymol script
+sasa = {}
+with open(infile) as inp:
+    for line in inp:
+        spl = line.split()
+        sasa[int(spl[0])] = float(spl[1])
+
+
+prot_cut = 1.6
+sasa_cut = 1
+
+x = 0
+protected = {}
+cfprotected = {}
+while x < len(n7.normprofile):
+    ##loop through the normprofile and take any normalized values above what we consider "protected"
+    if np.isfinite(n7.normprofile[x]) and n7.normprofile[x] >= prot_cut and n7.sequence[x] == 'G' and n7.nts[x] in sasa:
+        protected[n7.nts[x]] = n7.normprofile[x]
+    x += 1
+
+
+positives = {}
+negatives = {}
+for i in sasa:
+        ##if measured area is greater than our cutoff, it is unprotected and therefore a "negative"
+        if sasa[i] >= sasa_cut:
+            negatives[i] = sasa[i]
+        ##if measured area is lower than our cutoff it is protected and therefore "positive"
+        if sasa[i] < sasa_cut:
+            positives[i] =  sasa[i]
+
+TP = 0
+FP = 0
+TN = 0
+FN = 0
+for i in sasa:
+    ##compare our dict of protected values against the positive and negatives at every position, tick up appropriate bin
+    if i in protected and i in positives:
+        TP += 1
+    if i in protected and i in negatives:
+        FP += 1
+    if i in positives and i not in protected:
+        FN += 1
+    if i in negatives and i not in protected:
+        TN += 1
+
+print(' TP: {}\n FP: {}\n TN: {}\n FN: {}\n'.format(TP, FP, TN, FN))
+print('Sensitivity: {}\n'.format(TP/(TP+FN)))
+print('Specificity: {}\n'.format(TN/(TN+FP)))
+print('PPV: {}\n'.format(TP/(TP+FP)))
+print('NPV: {}\n'.format(TN/(TN+FN)))
+print('Diagnostic Accuracy: {}'.format((TP + TN)/(TP + FP + FN + TN))) 
+print('Protection cutoff: {}\nSASA cutoff:{}\n'.format(prot_cut,sasa_cut))
+print(len(sasa))
+print(TP+FP+TN+FN)
+
diff --git a/msDMS_Saleem&Miller_2025/3b_final.py b/msDMS_Saleem&Miller_2025/3b_final.py
new file mode 100644
index 0000000..51e1989
--- /dev/null
+++ b/msDMS_Saleem&Miller_2025/3b_final.py
@@ -0,0 +1,80 @@
+import random
+import numpy as np
+from ReactivityProfile import ReactivityProfile as rprofile
+import RNAStructureObjects as RNAtools
+
+import matplotlib.pyplot as plot
+import scipy.stats
+
+import sys, os, math
+import argparse
+plot.rcParams['pdf.fonttype'] = 42
+plot.rcParams['font.sans-serif'] = 'Arial'
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--real', required=True, help='path to profile')
+parser.add_argument('--random', required=True)
+parser.add_argument('--output', required=True, help='output prefix')
+parser.add_argument('--random2', required=True, help='2nd random dist')
+parser.add_argument('--real2', required=True, help='2nd real dist')
+args = parser.parse_args()
+
+
+def contact_filter(protections,CT1, filter_distance=0):
+    
+    num_within_range = {}
+    for i in protections:
+        count = 0
+        for j in protections:
+            if i == j:
+                continue
+            if CT1.contactDistance(i, j) > filter_distance:
+                
+                continue
+            else:
+                count += 1
+        num_within_range[i] = count
+    return num_within_range
+
+def parse_infile(input_f):
+    nts = {}
+    with open(input_f) as inp:
+        for i,line in enumerate(inp):
+            if i == 0:
+                continue
+            else:
+                spl = line.split()
+                if spl[2] == '0.0':
+                    continue
+                else:
+                    nts[int(spl[0])] = int(spl[1])
+    return nts
+
+def violin_plot(name, labels, data, p):
+    quants = []
+    for i in data:
+        quants.append([.25,.75])
+
+    fig, ax = plot.subplots(nrows = 1, ncols = 1)
+    ax.violinplot(data, showmedians=True)
+    ax.set_xticks(np.arange(1, len(labels) + 1), labels=labels)
+    #ax.set_ylim(0, 40)
+    ax.set_ylabel('number of protections within 20A')
+    ax.set_title('P = {}'.format(p))
+    fig.tight_layout(pad = 2)
+    fig.savefig(name + '.pdf')
+
+
+random_counts = parse_infile(args.random)
+counts = parse_infile(args.real)
+random_counts2 = parse_infile(args.random2)
+counts2 = parse_infile(args.real2)
+
+x_labels = ['16s Observed', '23s real', '16s Random Distribution', '23s random']
+data_toplot = [list(counts.values()), list(counts2.values()), list(random_counts.values()), list(random_counts2.values())]
+u, p = scipy.stats.mannwhitneyu(list(counts.values()), list(random_counts.values()), alternative='two-sided')
+u2, p2 = scipy.stats.mannwhitneyu(list(counts2.values()), list(random_counts2.values()), alternative='two-sided')
+print(p)
+print(p2)
+
+violin_plot(args.output, x_labels, data_toplot, str(p) + '\t' + str(p2))
\ No newline at end of file
diff --git a/msDMS_Saleem&Miller_2025/3d_calcs_final.py b/msDMS_Saleem&Miller_2025/3d_calcs_final.py
new file mode 100644
index 0000000..944f202
--- /dev/null
+++ b/msDMS_Saleem&Miller_2025/3d_calcs_final.py
@@ -0,0 +1,103 @@
+import argparse
+import matplotlib.pyplot as plot
+import numpy as np
+import random
+import copy
+
+plot.rcParams['pdf.fonttype'] = 42
+plot.rcParams['font.sans-serif'] = 'Arial'
+
+def read_prot_nts(fname):
+    sizes = []
+    with open(fname) as inp:
+        for line in inp:
+            sizes.append(int(line.strip()))
+    return sizes
+
+def read_primary_nts(fname):
+    p_locus = []
+    with open(fname) as inp:
+        for line in inp:
+            spl = line.split('-')
+            for i in range(int(spl[0]), int(spl[1]) + 1):
+                p_locus.append(i)
+    return p_locus
+
+def sample_fold(size_l,loci_nts, rna):
+    rna_copy = copy.deepcopy(rna)
+    length = len(rna_copy)
+    percent_cover = []
+    encapsulated = 0
+    selected = {}
+    for i in size_l:
+        nts_in = []
+        selected[i] = set()
+        while len(selected[i]) == 0:
+            prospective = set()
+            start = random.choice(rna_copy)
+            end = start + i
+            if end > length:
+                continue
+            for j in range(start, end + 1):
+                prospective.add(j)
+
+
+            if prospective.issubset(set(rna_copy)):
+                selected[i] = prospective
+        for j in selected[i]:
+            rna_copy.remove(j)
+            if j in loci_nts:
+                nts_in.append(j)
+        cover = len(nts_in) / len(selected[i])
+        percent_cover.append(cover)
+        if cover == 1.0:
+            encapsulated += 1
+
+    return encapsulated
+
+def histogram(en, label = '', title='',name='sfd_hist.pdf'):
+    fig, ax = plot.subplots()
+    ax.hist(en, histtype = 'bar', bins=[0,1,2,3,4,5,6,7,8,9,10],label=label, align='left')
+    ax.axvline(x=np.percentile(en, [50]), label=label + " median " + str(np.percentile(en, [50])))
+    ax.axvline(x=np.mean(en), label=label + " mean " + str(np.mean(en)))
+    ax.axvline(x=6, label='Observed')
+
+    
+    ax.set_ylabel('Count')
+    ax.set_xlabel('# of SFD encapsulated')
+    ax.set_xticks(range(10))
+    
+    ax.set_title(title)
+    ax.legend(loc='upper right')
+    fig.tight_layout()
+    plot.savefig(name)
+
+if __name__ == '__main__':
+    #1542 16s
+    #2904 23s
+    prot_nts = read_prot_nts('23s_selffoldingnts.txt')
+    prim_nts = read_primary_nts('23s_primarynts.txt')
+    #prot_nts = read_prot_nts('16s_selffoldingnts.txt')
+    #prim_nts = read_primary_nts('16s_primarynts.txt')
+    ssu = []
+    lsu = []
+    
+    #for i in range(1, 1542 + 1):
+    #    ssu.append(i)
+    
+    for i in range(1, 2904 + 1):
+        ssu.append(i)
+
+    #lsu_en = []
+    ssu_en = []
+
+    while len(ssu_en) < 10000:
+        ssu_en.append(sample_fold(prot_nts, prim_nts, ssu))
+
+    hit = []
+    for i in ssu_en:
+        if i == 6:
+            hit.append(i)
+    print(len(hit)/ len(ssu_en))
+    histogram(ssu_en, title='23s', name='23s_10k_sfd_hist.pdf')
+    
diff --git a/msDMS_Saleem&Miller_2025/4c+e.py b/msDMS_Saleem&Miller_2025/4c+e.py
new file mode 100644
index 0000000..d625780
--- /dev/null
+++ b/msDMS_Saleem&Miller_2025/4c+e.py
@@ -0,0 +1,367 @@
+from sklearn.metrics import roc_curve, roc_auc_score
+
+import numpy as np
+
+from ReactivityProfile import ReactivityProfile as rprofile
+import RNAStructureObjects as RNAtools
+import matplotlib as mpl
+import matplotlib.pyplot as plot
+mpl.use('Agg')
+mpl.rcParams['pdf.fonttype'] = 42
+mpl.rcParams['font.sans-serif'] ='Arial'
+
+import sys, os, math
+import argparse
+
+
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--pdbfile', required=True, help='path to crystal structure')
+parser.add_argument('--chain', type=str, required=True, help='RNA chain id in crystal structure')
+parser.add_argument('--inputRings', required=True, help='path to ringfile')
+parser.add_argument('--adjustFrame', type=int, default=0, help='adjust frame of profile to match crystal structure if necessary')
+parser.add_argument('--profile', required=True, help='path to profile')
+parser.add_argument('--ringType', choices=('N1', 'N1N7', 'N7'), required=True, help='type of rings to plot')
+parser.add_argument('--crystalStart', type=int, required=True, help='first position of the RNA chain')
+parser.add_argument('--crystalEnd', type=int, required=True, help='last position of the RNA chain')
+parser.add_argument('--outputName', required=True, help='Output prefix for pymol session')
+parser.add_argument('--contactFilter', type=int, default=0, help='contact filter distance')
+parser.add_argument('--concatenated', action='store_true', help='plot all types of rings')
+parser.add_argument('--filterNegative', action='store_true', help='filter negative rings')
+parser.add_argument('--ctFile', required=True, help='path to ct file')
+parser.add_argument('--APCcutoff', type=int, default=2, help='filter rings with a score lower than or equal to this number')
+parser.add_argument('--approx', action='store_true', help='draw ring to closest nt if nt is missing from crystal structure')
+parser.add_argument('--renderall', action='store_true', help='render all the rings if a total concat file is passed')
+parser.add_argument('--distanceFilter', type=int, default=99999, help='check average contact distance of things within filter(angstroms)')
+args = parser.parse_args()
+
+
+
+'''
+tool for parsing ring data and drawing it onto a crystal structure
+
+takes a profile, ringfile
+takes a chain of interest for a crystal structure
+takes first and last residue numbers of that chain to get things in frame
+takes a frameshift integer
+'''
+
+cmd.load(args.pdbfile)
+
+profile = rprofile(args.profile)
+
+rings = []
+residues = []
+
+def adjust_frame(profile, ring_list, difference):
+    profile.nts = [x + difference for x in profile.nts]
+    for i in ring_list:
+        
+        i[0] += difference
+        i[1] += difference
+
+
+def parse_ringmap(cutoff, name='', filterneg=False):
+    rings = []
+    with open(name) as inp:
+        for p, line in enumerate(inp):
+            if p == 0 or p == 1:
+                continue
+            else:
+                split = line.split()
+                #split[2] for sig split[9] for alpha
+                if float(split[9]) <= cutoff:
+                    continue
+                    
+                if filterneg:
+                    if split[3] == '-1':
+                        
+                        continue
+                rings.append([int(split[0]), int(split[1]), float(split[9])])
+    return rings
+
+def contact_filter(inrings, filter_distance=0, ctfile=None):
+    
+    rings = []
+    CT1 = None
+    if ctfile is not None:
+        CT1 = RNAtools.CT(ctfile)
+    
+    for i in inrings:
+            #print(CT1.contactDistance(i[0], i[1]))
+            #print(str(i[0]) + ' ' + str(i[1]))
+            if CT1.contactDistance(i[0], i[1]) <= filter_distance:
+                #print('filtered')
+                continue
+            else:
+                
+                i.append(CT1.contactDistance(i[0],i[1]))
+                print("contact filter add {}".format(i))
+                rings.append(i)
+
+    return rings
+
+def split_concat(concat_list, profile):
+    last =  profile.nts[-1]
+    n1_rings = []
+    n1n7_rings = []
+    n7_rings = []
+    for i in concat_list:
+        if i[0] > last and i[1] > last:
+            i[0] -= last
+            i[1] -= last
+            n7_rings.append(i)
+        elif i[0] < last and i[1] < last:
+            n1_rings.append(i)
+        else:
+            n1n7_rings.append(i)
+            if i[0] > last:
+                i[0] -= last
+            else:
+                i[1] -= last
+    print(len(n1_rings), len(n1n7_rings), len(n7_rings))
+    return(n1_rings, n1n7_rings, n7_rings)
+
+def closest(lst, K):
+     
+    return lst[min(range(len(lst)), key = lambda i: abs(lst[i]-K))]
+
+
+def draw(ring_list, chain, crystalstart, crystalend, approx, ringtype, render=False, distancefilter=0):
+    distances = []
+    scores = []
+
+    
+    #get all the residues in a chain. some crystal structures are missing residues in the middle
+    
+    dchain = 'chain {}'.format(chain)
+    cmd.iterate(dchain,'residues.append(int(resi))')
+    fresidues = [*set(residues)]
+    #print('residues {}'.format(fresidues))
+    
+    
+    if ringtype == 'N1':
+        colors = [(44,123,182), (44,123,182), (171,217,233), (255,255,255), (253,174,97), (215,25,28), (215,25,28)]
+    else:
+        colors = [(91, 218, 190), (91, 218, 190), (0, 95, 154), (255, 255, 255), (218, 91, 172), (83, 48, 95), (83, 48, 95)]
+
+    #change to (20,100) for sig
+    cutoffs = [-1e10, -5, -2, 0, 2, 5, 1e10]
+    #alpha = [1.0, 1.0, 0.0, 0.0, 1.0, 1.0]
+    gradiant = [False, True, False, False, True, False]
+
+    for v,i in enumerate(ring_list):
+        linestart = i[0]
+        startstr = 'start_{}'.format(v+1)
+        endstr = 'end_{}'.format(v+1)
+        lineend = i[1]
+
+        
+        if linestart not in fresidues:
+            if approx:
+                linestart = closest(fresidues, linestart)
+            else:
+                continue
+        if lineend not in fresidues:
+            if approx:
+                lineend = closest(fresidues, lineend)
+            else:
+                continue
+
+        if linestart  <= crystalstart or lineend <= crystalstart:
+            
+            continue
+        elif linestart >= crystalend or lineend >= crystalend:
+            
+            continue
+
+        else:
+            distance = cmd.get_distance(atom1='chain {} and resi {} and name P'.format(chain, linestart), atom2='chain {} and resi {} and name P'.format(chain, lineend))
+            
+            if distance < distancefilter:
+                try:
+                    print(i)
+                    scores.append(i[3] / distance)
+                except:
+                    print('FAIL at ring {}'.format(i))
+            
+
+            distances.append(distance)
+            
+            for ind in range(len(cutoffs) - 1):
+                if cutoffs[ind] < i[2] <= cutoffs[ind + 1]:
+                    if gradiant[ind]:
+                        calccolor = colorGrad(i[2], colors[ind], colors[ind + 1], cutoffs[ind], cutoffs[ind + 1])
+                    else:
+                        calccolor = colors[ind]
+
+            cmd.set_color("color {}".format(v+1),calccolor)
+
+            cmd.select(startstr, 'chain {} and resi {} and name P'.format(chain, linestart))
+            start = cmd.get_coords('chain {} and resi {} and name P'.format(chain, linestart), 1)
+            
+            cmd.pseudoatom('points', name='start_{}'.format(v+1), pos=tuple(start[0]), color="color {}".format(v+1))
+        
+            cmd.deselect()
+        
+            cmd.select(endstr, 'chain {} and resi {} and name P'.format(chain, lineend))
+            end = cmd.get_coords('end_{}'.format(v+1), 1)
+            cmd.pseudoatom('points', name='end_{}'.format(v+1), pos=tuple(end[0]), color="color {}".format(v+1))
+
+            cmd.deselect()
+            if render:
+                print('rendering rings')
+                cmd.bond('points and name start_{}'.format(v+1), 'points and name end_{}'.format(v+1))
+
+    print("scores {}".format(scores))
+    try:
+        print("average score {}".format(sum(scores)/len(scores)))
+    except:
+        print('no rings in category')
+    return distances
+
+
+def filter_unprot_pos(ring_list, gaprofile):
+    '''
+    should be run after adjust_frame if applicable
+    '''
+
+    normvalues = {}
+    for i, v in enumerate(gaprofile.nts):
+        if np.isfinite(gaprofile.normprofile[i]):
+            normvalues[v] =  gaprofile.normprofile[i]
+
+    for i in ring_list:
+        if i[0] in normvalues and normvalues[i[0]] <= 2:
+            ring_list.remove(i)
+        if i[1] in normvalues and normvalues[i[1]] <= 2:
+            ring_list.remove(i)
+
+    return ring_list
+
+
+def filter_sig(ring_list, cutoff):
+    for i in ring_list:
+        if float(i[3]) < cutoff:
+            ring_list.remove(i)
+    return ring_list
+
+def filter_unconcat(concat_list, unconcat_list):
+    print('concat length {}'.format(len(concat_list)))
+    num_matches = 0
+    
+    non_matches = []
+    original = concat_list.copy()
+    for i in unconcat_list:
+        if i in original:
+            num_matches += 1
+            concat_list.remove(i)
+            
+        elif i not in original:
+            non_matches.append(i)
+    
+
+    for i in non_matches:
+        i[0] += 1
+    for i in non_matches:
+        if i in concat_list:
+            num_matches += 1
+    print(concat_list, '\n')
+    print(non_matches)
+    print('{} matches'.format(num_matches))
+
+    print(len(unconcat_list))
+    print("{} filtered length".format(len(concat_list)))
+    return concat_list
+
+
+
+def dist_histogram(dist_list, bins=10, title='',name='dist_hist.pdf'):
+    fig, ax = plot.subplots()
+    label = ''
+    colors = ['blue', 'orange', 'green']
+    for i, v in enumerate(dist_list):
+        if len(v) <= 0:
+            continue
+        if i == 0:
+            label = "N7"
+        elif i == 1:
+            label = 'N1'
+        elif i == 2:
+            label = "N7"
+        
+        ax.hist(v, histtype = 'step', bins = bins, label=label)
+        ax.axvline(x=np.percentile(v, [50]), color=colors[i],label=label + " median " + str(np.percentile(v, [50])))
+        ax.axvline(x=np.mean(v), color=colors[i],label=label + " mean " + str(np.mean(v)))
+
+    
+    ax.set_ylabel('Count')
+    ax.set_xlabel('P-P Distance A')
+    ax.set_xlim(0, 70)
+    
+    #ax.set_ylim(20,150)
+    ax.set_title(title)
+    ax.legend(loc='upper right')
+    fig.tight_layout()
+    plot.savefig(name)
+
+def colorGrad(value, colorMin, colorMax, minValue, maxValue, log=False):
+        """ return an interpolated rgb color value """
+        
+        if log:
+            value = 10**-value
+            minValue = 10**-minValue
+            maxValue = 10**-maxValue
+
+        if value > maxValue:
+            return colorMax
+        elif value < minValue:
+            return colorMin
+        else:
+            v = value - min(maxValue, minValue)
+            v /= abs(maxValue-minValue)
+        
+            col = []
+            for i in range(3):
+                col.append( v*(colorMax[i] - colorMin[i]) + colorMin[i] )
+            
+            return col
+    
+
+rings = parse_ringmap(args.APCcutoff, args.inputRings, args.filterNegative)
+N1_rings, N1N7_rings, N7_rings = split_concat(rings, profile)
+
+if args.concatenated:
+    args.ringType= 'N7'
+    N7_rings.extend(N1N7_rings)
+    #N1_rings.extend(N7_rings)
+    pass
+
+rings_toplot = []
+if args.ringType == 'N7':
+    rings_toplot = N7_rings.copy()
+elif args.ringType == 'N1N7':
+    rings_toplot = N1N7_rings.copy()
+else:
+    rings_toplot = N1_rings.copy()
+
+print('precf {}'.format(rings_toplot))
+if args.contactFilter > 0:
+    rings_toplot = contact_filter(rings_toplot, filter_distance=args.contactFilter, ctfile=args.ctFile)
+    N1_rings =  contact_filter(N1_rings, filter_distance=args.contactFilter, ctfile=args.ctFile)
+    N1N7_rings = contact_filter(N1N7_rings, filter_distance=args.contactFilter, ctfile=args.ctFile)
+    N7_rings =  contact_filter(N7_rings, filter_distance=args.contactFilter, ctfile=args.ctFile)
+
+
+
+
+adjust_frame(profile, rings_toplot, args.adjustFrame)
+dist_data = []
+dist_data1 = draw(rings_toplot, args.chain, args.crystalStart, args.crystalEnd, args.approx, args.ringType, render=True)
+dist_data2 = draw(N1_rings, args.chain, args.crystalStart, args.crystalEnd, args.approx, args.ringType, render=False)
+dist_data.append(dist_data1)
+dist_data.append(dist_data2)
+
+dist_histogram(dist_data, bins=np.linspace(0,50,10), title='{} {}cf filterneg={} rings'.format(args.inputRings, args.contactFilter, str(args.filterNegative)), name='{} {}cf filterneg={} rings.pdf'.format(args.inputRings, args.contactFilter, str(args.filterNegative)))
+plot.close('all')
+cmd.save(args.outputName + '.pse')
\ No newline at end of file
diff --git a/msDMS_Saleem&Miller_2025/Figure_1B_final.py b/msDMS_Saleem&Miller_2025/Figure_1B_final.py
new file mode 100644
index 0000000..188bfef
--- /dev/null
+++ b/msDMS_Saleem&Miller_2025/Figure_1B_final.py
@@ -0,0 +1,42 @@
+import argparse
+from ReactivityProfile import ReactivityProfile as rprofile
+import matplotlib as mpl
+import numpy as np
+mpl.use('Agg')
+mpl.rcParams['pdf.fonttype'] = 42
+mpl.rcParams['font.sans-serif'] = 'Arial'
+from matplotlib import pyplot as plot
+
+argparser = argparse.ArgumentParser()
+argparser.add_argument('--ssu_old')
+argparser.add_argument('--ssu_new')
+argparser.add_argument('--lsu_old')
+argparser.add_argument('--lsu_new')
+argparse.add_argument('--out')
+args = argparser.parse_args()
+small_old = rprofile(args.ssu_old)
+large_old = rprofile(args.lsu_old)
+small_new = rprofile(args.ssu_new)
+large_new = rprofile(args.lsu_new)
+
+
+old = [small_old.rawprofile[52], large_old.rawprofile[72]]
+new = [small_new.rawprofile[52],large_new.rawprofile[72]]
+
+
+xaxis = ['16s', '23s']
+barcount = 2
+ind = np.arange(barcount)
+width = 0.3
+
+fig, ax = plot.subplots()
+
+ax.bar(ind,new, width, label='new')
+ax.bar(ind+width , old, width, label='old')
+ax.set_xticks(ind + width/2, ('16s', '23s'))
+
+
+ax.set_title('m7g reactivities')
+ax.legend(loc='upper right')
+fig.tight_layout()
+plot.savefig(args.out)
\ No newline at end of file
diff --git a/msDMS_Saleem&Miller_2025/README.md b/msDMS_Saleem&Miller_2025/README.md
new file mode 100644
index 0000000..2320fb3
--- /dev/null
+++ b/msDMS_Saleem&Miller_2025/README.md
@@ -0,0 +1 @@
+Code relevant to the msDMS-MaP manuscript that cannot be found in other Mustoe Lab repositories. Mainly one-off analyses for figures.
diff --git a/msDMS_Saleem&Miller_2025/SI1_C_final.py b/msDMS_Saleem&Miller_2025/SI1_C_final.py
new file mode 100644
index 0000000..ec7bf1f
--- /dev/null
+++ b/msDMS_Saleem&Miller_2025/SI1_C_final.py
@@ -0,0 +1,46 @@
+import sys
+from ReactivityProfile import ReactivityProfile
+import matplotlib as mpl
+import matplotlib.pyplot as plt
+import numpy as np
+import argparse
+mpl.use('Agg')
+mpl.rcParams['font.family'] = 'sans-serif'
+mpl.rcParams['pdf.fonttype'] = 42
+mpl.rcParams['font.sans-serif'] = 'Arial'
+
+argparser = argparse.ArgumentParser()
+argparser.add_argument('--prof1')
+argparser.add_argument('--prof2')
+argparser.add_argument('--out')
+args = argparser.parse_args()
+
+prof1 = ReactivityProfile(args.prof1)
+prof2 = ReactivityProfile(args.prof2)
+
+fig, ax = plt.subplots()
+
+ax.step(prof1.nts, (prof1.rawprofile * 100), label=args.prof1, where='mid', linewidth=2, linestyle='-')
+ax.step(prof1.nts, (prof2.rawprofile * 100), label=args.prof2, where='mid', linewidth=2)
+
+#52 for ssu 72 for lsu
+seq =  [str(i) for i in prof1.sequence]
+#seq[72] = 'm7G'
+seq[52] = 'm7G'
+
+ax.set_xlabel('Nucleotide', fontsize=20)
+ax.set_xticks(prof1.nts)
+ax.set_xticklabels(seq)
+
+#ax.set_xlim([65,82])
+ax.set_xlim([44,64])
+
+
+ax.set_ylabel('Mutation rate(%)', fontsize=20)
+ax.set_ylim([0, 25])
+
+ax.legend()
+fig.tight_layout()
+
+fig.savefig(args.out)
+
diff --git a/msDMS_Saleem&Miller_2025/SIfig1_1b_final.py b/msDMS_Saleem&Miller_2025/SIfig1_1b_final.py
new file mode 100644
index 0000000..3186c5a
--- /dev/null
+++ b/msDMS_Saleem&Miller_2025/SIfig1_1b_final.py
@@ -0,0 +1,108 @@
+from ReactivityProfile import ReactivityProfile as rp
+import matplotlib as mpl
+import numpy as np
+mpl.use('Agg')
+mpl.rcParams['pdf.fonttype'] = 42
+mpl.rcParams['font.sans-serif'] = 'Arial'
+from matplotlib import pyplot as plot
+from matplotlib.ticker import AutoMinorLocator
+
+def get_data(listofprofs, subunit):
+    m7gs_mn = []
+    bgs_mn = []
+    if subunit == 'large':
+        x = 72
+    else:
+        print('smallsub')
+        x = 52
+    for i in listofprofs:
+        print(i.rawprofile)
+        m7gs_mn.append(i.rawprofile[x])
+        allreactivities = 0
+        for j in i.rawprofile:
+            if np.isfinite(j):
+                allreactivities += j
+        allreactivities -= i.rawprofile[x]
+        bg = allreactivities / (len(i.rawprofile) - 1)
+        bgs_mn.append(bg)
+    return m7gs_mn, bgs_mn
+
+
+Original_16s = rp('ph83_16s_16sm7_profile.txt')
+half_mn_16s = rp('05mM_16sm7_profile.txt')
+two_mn_16s =rp('2mM_16sm7_profile.txt')
+two5_mn_16s = rp('25ph83vs2ph83_16sm7_profile.txt')
+three_mn_16s = rp('3mM_16sm7_profile.txt')
+
+
+ph75_16s = rp('ph75_16sm7_profile.txt')
+ph8_16s = rp('ph8_16sm7_profile.txt')
+ph85_16s = rp('ph85_16sm7_profile.txt')
+finalcondition_16s = rp('2mmph9_16sm7_profile.txt')
+
+Original_23s = rp('ph83_23sm7_profile.txt')
+half_mn_23s = rp('05mM_23sm7_profile.txt')
+two_mn_23s = rp('2mM_23sm7_profile.txt')
+two5_mn_23s = rp('25mM_23sm7_profile.txt')
+three_mn_23s = rp('3mM_23sm7_profile.txt')
+
+ph75_23s = rp('ph75_23sm7_profile.txt')
+ph8_23s = rp('ph8_23sm7_profile.txt')
+ph85_23s = rp('ph85_16sm7_profile.txt')
+finalcondition_23s = rp('2mmph9_23sm7_profile.txt')
+
+ssu_manganese_m7gs, ssu_manganese_bgs = get_data([half_mn_16s, Original_16s, two_mn_16s, two5_mn_16s, three_mn_16s], '')
+ssu_ph_m7gs, ssu_ph_bgs = get_data([ph75_16s, ph8_16s, Original_16s, ph85_16s], '')
+ssu_final_m7g, ssu_final_bg = get_data([finalcondition_16s], '')
+
+lsu_manganese_m7gs, lsu_manganese_bgs = get_data([half_mn_23s, Original_23s, two_mn_23s, two5_mn_23s, three_mn_23s], 'large')
+lsu_ph_m7gs, lsu_ph_bgs = get_data([ph75_23s, ph8_23s, Original_23s, ph85_23s], 'large')
+lsu_final_m7g, lsu_final_bg = get_data([finalcondition_23s], 'large')
+
+
+manganese_m7gs = [np.mean(i) for i in zip(ssu_manganese_m7gs,lsu_manganese_m7gs)]
+ph_m7gs = [np.mean(i) for i in zip(ssu_ph_m7gs,lsu_ph_m7gs)]
+final_m7g = [np.mean(i) for i in zip(ssu_final_m7g,lsu_final_m7g)]
+
+
+manganese_bgs = [np.mean(i) for i in zip(ssu_manganese_bgs,lsu_manganese_bgs)]
+ph_bgs = [np.mean(i) for i in zip(ssu_ph_bgs,lsu_ph_bgs)]
+final_bg = [np.mean(i) for i in zip(ssu_final_m7g,lsu_final_bg)]
+
+
+#add in empty data so final condition can be included in plot
+ph_m7gs.append(0.0)
+ph_bgs.append(0.0)
+
+labels_1 = ['0.5', '1', '2', '2.5', '3']
+labels_2 = ['7.5', '8.0', '8.3', '8.5', '9']
+labels_3 = ['2mM Mn pH 8.3', '1mM Mn pH 9', '2mM Mn pH 9']
+fig,ax = plot.subplots(1, 4, figsize=(10,2))
+
+#ax[2] = ax[0].twinx()
+#share yaxis
+ax[0].set_title('Manganese Concentration')
+ax[0].scatter(labels_1, manganese_m7gs)
+ax[0].plot(labels_1, manganese_m7gs)
+#add bg data
+ax[0].scatter(labels_1, manganese_bgs)
+ax[0].scatter(labels_1, [0.0,0.0,final_m7g[0],0.0,0.0])
+ax[0].plot(labels_1, manganese_bgs)
+ax[0].set_yscale('log', base=10)
+
+
+ax[1].set_title('Acidity')
+ax[1].scatter(labels_2, ph_m7gs)
+ax[1].plot(labels_2, ph_m7gs)
+#add bg data
+ax[1].scatter(labels_2, ph_bgs)
+ax[1].plot(labels_2, ph_bgs)
+ax[1].scatter(labels_2, [0.0, 0.0 ,0.0 ,0.0, final_m7g[0]])
+ax[1].set_yscale('log', base=10)
+#ax[3] = ax[1].twinx()
+
+
+
+
+fig.tight_layout()
+plot.savefig('SIFigure_1A_averages.pdf')
\ No newline at end of file
diff --git a/msDMS_Saleem&Miller_2025/SIfig1_1d_final.py b/msDMS_Saleem&Miller_2025/SIfig1_1d_final.py
new file mode 100644
index 0000000..461632e
--- /dev/null
+++ b/msDMS_Saleem&Miller_2025/SIfig1_1d_final.py
@@ -0,0 +1,27 @@
+import pandas as pd
+import matplotlib as mpl
+import numpy as np
+mpl.use('Agg')
+mpl.rcParams['pdf.fonttype'] = 42
+mpl.rcParams['font.sans-serif'] = 'Arial'
+from matplotlib import pyplot as plt
+
+#df = pd.read_csv('2mmph9_Modified_16sm7_mutation_counts.txt',sep='\t',header=(0))
+df = pd.read_csv('ph9_Modified_23sm7_mutation_counts.txt',sep='\t',header=(0))
+
+#23s
+m7g = df.iloc[72]
+#16s
+#m7g = df.iloc[52]
+
+
+labels = 'GA', 'Other'
+data = [m7g['GA'], m7g['GT'] + m7g['GC'] + m7g['complex_insertion'] + m7g['complex_deletion'] + m7g['G_multinuc_mismatch']]
+
+fig, ax = plt.subplots()
+ax.pie(data, labels=labels, autopct='%1.1f%%')
+
+print(m7g['GA'])
+
+fig.tight_layout()
+plt.savefig('SIFigure_1C_23sfinalRT.pdf')
\ No newline at end of file
diff --git a/msDMS_Saleem&Miller_2025/figure1c_final.py b/msDMS_Saleem&Miller_2025/figure1c_final.py
new file mode 100644
index 0000000..c18bd38
--- /dev/null
+++ b/msDMS_Saleem&Miller_2025/figure1c_final.py
@@ -0,0 +1,35 @@
+from ReactivityProfile import ReactivityProfile as rprofile
+import argparse
+import matplotlib as mpl
+import numpy as np
+mpl.use('Agg')
+mpl.rcParams['pdf.fonttype'] = 42
+mpl.rcParams['font.sans-serif'] = 'Arial'
+from matplotlib import pyplot as plot
+
+argparser = argparse.ArgumentParser()
+argparser.add_argument('--sat')
+argparser.add_argument('--unsat')
+argparser.add_argument('--out')
+args = argparser.parse_args()
+
+satTPP = rprofile(args.sat)
+unsatTPP = rprofile(args.unsat)
+
+fig, ax = plot.subplots()
+
+
+#manual bg sub
+unsat_man_bg = np.nan_to_num(unsatTPP.rawprofile[18:120]) - np.nan_to_num(unsatTPP.backprofile[18:120])
+sat_man_bg = np.nan_to_num(satTPP.rawprofile[18:120]) - np.nan_to_num(satTPP.backprofile[18:120])
+
+
+ax.step(satTPP.nts[18:120], unsat_man_bg, label='Unsaturated', where='mid')
+ax.step(satTPP.nts[18:120], sat_man_bg, label='Saturated', where='mid')
+
+
+
+ax.set_title('N7 reactivities')
+ax.legend(loc='upper right')
+fig.tight_layout()
+plot.savefig(args.out)
\ No newline at end of file
diff --git a/msDMS_Saleem&Miller_2025/pymol_rmsd.py b/msDMS_Saleem&Miller_2025/pymol_rmsd.py
new file mode 100644
index 0000000..67d8e9d
--- /dev/null
+++ b/msDMS_Saleem&Miller_2025/pymol_rmsd.py
@@ -0,0 +1,52 @@
+from pymol import cmd
+import argparse
+from glob import glob as gb
+
+def get_models(prefix, dirpath):
+    models = []
+    for model in gb(dirpath + prefix + '*'):
+        if model.split('.')[-1] != 'cif':
+            continue
+        models.append(model)
+    #print(models)
+    return models
+
+def get_rmsd(reference_obj, predicted):
+    pred_obj = predicted.split('\\')[-1].split('.')[0]
+
+    print(reference_obj, pred_obj)
+    cmd.load(predicted, pred_obj)
+    #cmd.select(reference_obj)
+    #cmd.select(pred_obj)
+    out = cmd.align(reference_obj, pred_obj, cycles=0)
+    cmd.delete(pred_obj)
+    print(out[0], out[3])
+    return out[3]
+
+def write_rmsds(rmsd_dict, outname):
+    out = open(outname, 'w')
+    for model in rmsd_dict.keys():
+        out.write(model.split('\\')[-1].split('.')[0] + '\t' + str(rmsd_dict[model]) + '\n')
+
+def _rmsds_main(ref_struct, pred_prefix, output, directory):
+    model_list = get_models(pred_prefix, directory)
+    ref_obj = ref_struct.split('/')[-1].split('.')[0]
+    cmd.load(ref_struct, ref_obj)
+    rmsds = {}
+    for model in model_list:
+        rmsds[model] = get_rmsd(ref_obj, model)
+    write_rmsds(rmsds, output)
+
+if __name__ == '__main__':
+    argparser = argparse.ArgumentParser(description='Compare many structure predictions to a single pdb structure. Output is RMSD in angstroms for all atoms')
+    argparser.add_argument('--pred_dir', default='./', help='directory to search for predicted structures')
+    argparser.add_argument('--pred_prefix', required=True, help='prefix for all predicted structures to compare')
+    argparser.add_argument('--ref_struct', required=True, help='reference structure to compare against')
+    argparser.add_argument('--output', required=True, help='output filename including file ext e.g. (output.txt)')
+    args = argparser.parse_args()
+
+    _rmsds_main(args.ref_struct, args.pred_prefix, args.output, args.pred_dir)
+    
+
+
+
diff --git a/msDMS_Saleem&Miller_2025/pymol_sasa.py b/msDMS_Saleem&Miller_2025/pymol_sasa.py
new file mode 100644
index 0000000..248c3f3
--- /dev/null
+++ b/msDMS_Saleem&Miller_2025/pymol_sasa.py
@@ -0,0 +1,38 @@
+
+#__main__.pymol_argv = ['pymol','-qc']
+
+import sys
+import argparse
+
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--pdbfile')
+parser.add_argument('--chain')
+parser.add_argument('--output')
+
+args = parser.parse_args()
+
+radius = 4
+
+cmd.set('dot_solvent', 1)
+cmd.set('dot_density', 3)
+cmd.set('solvent_radius', radius)
+
+cmd.load(args.pdbfile)  
+
+
+
+rnumbers = []
+cmd.iterate('chain {} and resn G and name N7'.format(args.chain), 'rnumbers.append(resi)')
+
+out = open(args.output,'w')
+for i in rnumbers:
+    
+    if cmd.count_atoms('chain {} and resi {} and name N7'.format(args.chain, i)) == 1:
+        
+        v = cmd.get_area('chain {} and resi {} and name N7'.format(args.chain, i))
+        out.write('{} {}\n'.format(i,v))
+
+out.close()
+
+
diff --git a/msDMS_Saleem&Miller_2025/random_ringsample.py b/msDMS_Saleem&Miller_2025/random_ringsample.py
new file mode 100644
index 0000000..c97733a
--- /dev/null
+++ b/msDMS_Saleem&Miller_2025/random_ringsample.py
@@ -0,0 +1,103 @@
+import argparse
+import matplotlib.pyplot as plot
+import numpy as np
+import random
+import copy
+
+plot.rcParams['pdf.fonttype'] = 42
+plot.rcParams['font.sans-serif'] = 'Arial'
+
+def read_ring(input_rings):
+    length = 0
+    n1rings = []
+    n7rings = []
+    with open(input_rings) as ringin:
+        
+        for i,line in enumerate(ringin):
+            if i > 1:
+                spl = line.split()
+                n1 = int(spl[0])
+                n2 = int(spl[1])
+                a = float(spl[9])
+                #print(a)
+                if a < 2:
+                    continue
+                else:
+                    if n1 <= length and n2 <= length: #filter n1 rings
+                        n1rings.append((n1,n2))
+                    else:
+                        n7rings.append((n1,n2))
+            else:
+                if i == 0:
+                    length = int(line.split()[0])
+
+    #print(n1rings)
+    return length,n1rings, n7rings
+
+def read_primary_nts(fname):
+    p_locus = []
+    with open(fname) as inp:
+        for line in inp:
+            spl = line.split('-')
+            for i in range(int(spl[0]), int(spl[1]) + 1):
+                p_locus.append(i)
+    print(p_locus)
+    return p_locus
+
+def histogram(en, label = '', title='',name='sfd_hist.pdf'):
+    fig, ax = plot.subplots()
+    ax.hist(en, histtype = 'bar', bins=[0.0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0],label=label, align='left')
+    ax.axvline(x=np.percentile(en, [50]), label=label + " median " + str(np.percentile(en, [50])))
+    ax.axvline(x=np.mean(en), label=label + " mean " + str(np.mean(en)))
+    ax.axvline(x=.84, label='Observed')
+
+    
+    ax.set_ylabel('Count')
+    ax.set_xlabel('% rings between tertiary sites')
+
+    
+    ax.set_title(title)
+    ax.legend(loc='upper right')
+    fig.tight_layout()
+    plot.savefig(name)
+
+def sample_fold(rings,loci_nts, rna):
+    encapsulated = 0
+    for i in rings:
+        rna_copy = copy.deepcopy(rna)
+        nt1 = random.choice(rna_copy)
+        rna_copy.remove(nt1)
+        if nt1 - 1 in rna_copy:
+            rna_copy.remove(nt1 - 1)
+        if nt1 + 1 in rna_copy:
+            rna_copy.remove(nt1 + 1)
+        nt2 = random.choice(rna_copy)
+        #print(nt1, nt2)
+        if nt1 in loci_nts and nt2 in loci_nts:
+            encapsulated += 1
+    return encapsulated / len(rings)
+
+if __name__ == '__main__':
+    # tpp_tertnts.txt p546_tertnts.txt
+    loci = read_primary_nts('p546_tertnts.txt')
+
+    # p546_r2r3_intersect_shared_corrbuffer.txt satTPP_intersect_shared_corrbuffer.txt
+    rna_len,n1,n7 = read_ring('p546_r2r3_intersect_shared_corrbuffer.txt')
+
+    rna_nts = []
+    for i in range(1, rna_len + 1):
+        rna_nts.append(i)
+
+    samples = []
+    while len(samples) < 10000:
+        samples.append(sample_fold(n1, loci, rna_nts))
+    print(samples)
+
+    
+    hit = []
+    for i in samples:
+        if i >= 0.83: # 
+            hit.append(i)
+    print(len(hit)/ len(samples))
+    histogram(samples, title='P546', name='P546_10k_ringsample_hist.pdf')
+   
diff --git a/msDMS_Saleem&Miller_2025/si2_d+e_final.py b/msDMS_Saleem&Miller_2025/si2_d+e_final.py
new file mode 100644
index 0000000..df692f5
--- /dev/null
+++ b/msDMS_Saleem&Miller_2025/si2_d+e_final.py
@@ -0,0 +1,67 @@
+from json.tool import main
+import matplotlib
+matplotlib.use('Agg')
+import matplotlib.pyplot as plot
+import sys
+import RNAtools2 as RNAtools
+import numpy as np
+from sklearn.metrics import roc_curve, roc_auc_score
+from ReactivityProfile import ReactivityProfile
+plot.rcParams['pdf.fonttype'] = 42
+plot.rcParams['font.sans-serif'] = 'Arial'
+import argparse
+
+argparser = argparse.ArgumentParser()
+argparser.add_argument('--prof1')
+argparser.add_argument('--prof2')
+argparser.add_argument('--ctfile')
+argparser.add_argument('--out')
+args = argparser.parse_args()
+profobj = ReactivityProfile(args.prof1)
+prof2obj = ReactivityProfile(args.prof2)
+ctobj = RNAtools.CT(args.ctfile)
+pairedlist = ctobj.pairedResidueList(False)
+
+def getNtLists(profile, pairedlist, ntype):
+    
+        react = []
+        ispaired = []
+        for i,v in enumerate(profile.subprofile):               
+            if v > -10 and profile.sequence[i] == ntype:
+                react.append(v)
+                ispaired.append( int((i+1) in pairedlist) )
+            
+        print(react)
+        return react, ispaired
+
+
+labels = ('A', 'C', 'G', 'U')
+scores = []
+scores2 = []
+
+fig,ax = plot.subplots(1, 4, figsize=(8,2))
+
+for i,nt in enumerate(labels):
+    r, p = getNtLists(profobj, pairedlist, nt)
+    scores.append(roc_auc_score(p,r))
+
+for i,nt in enumerate(labels):
+    r, p = getNtLists(prof2obj, pairedlist, nt)
+    scores2.append(roc_auc_score(p,r))
+
+
+fig,ax = plot.subplots()
+
+barcount = 4
+ind = np.arange(barcount)
+width = 0.3
+
+fig, ax = plot.subplots()
+
+ax.bar(ind,scores, width, label='new')
+ax.bar(ind+width , scores2, width, label='old')
+ax.set_xticks(ind + width/2, labels)
+
+
+fig.tight_layout()
+plot.savefig(args.out)
\ No newline at end of file
diff --git a/msDMS_Saleem&Miller_2025/si2_g_final.py b/msDMS_Saleem&Miller_2025/si2_g_final.py
new file mode 100644
index 0000000..719f634
--- /dev/null
+++ b/msDMS_Saleem&Miller_2025/si2_g_final.py
@@ -0,0 +1,60 @@
+import re
+import sys
+import argparse
+from sklearn.metrics import roc_curve, roc_auc_score
+import matplotlib.pyplot as pyplot
+import numpy as np
+
+from ReactivityProfile import ReactivityProfile as rprofile
+import RNAStructureObjects as RNAtools
+
+argparser = argparse.ArgumentParser()
+argparser.add_argument('--n7profile')
+argparser.add_argument('--ctfile')
+argparser.add_argument('--out')
+args = argparser.parse_args()
+
+n7 = rprofile(args.n7profile)
+
+bgprof = n7.backprofile
+rawprof = n7.rawprofile
+bgsub = n7.subprofile
+normprof = n7.normprofile
+
+react, seq = [],[]
+x=0
+
+
+while x < (len(n7.sequence)):
+    seq.append(n7.sequence[x])
+    react.append(n7.subprofile[x])
+    x += 1
+
+
+ct = RNAtools.CT(args.ctfile)
+pairs = ct.pairedResidueList()
+
+s,r = [],[]
+
+# go through the RNA, only looking at Gs with defined reactivites (not NaN), check to see if the G measured is in the crystal structure as well
+for i,v in enumerate(seq):
+    if v == 'G' and np.isfinite(react[i]):
+        r.append(react[i])
+        if i in pairs:
+            s.append(True)
+        else:
+            s.append(False)
+
+
+# compute ROC curve and plot, along with AUC
+tmp = roc_curve(s, r, drop_intermediate=False)
+pyplot.plot(tmp[0], tmp[1], label='Basepairing: {:.2f}'.format(roc_auc_score(s,r)))
+    
+
+pyplot.plot([0, 1], [0, 1], linestyle="--")
+pyplot.xlim([0.0, 1.0])
+pyplot.ylim([0.0, 1.05])
+pyplot.title('')
+pyplot.legend()
+
+pyplot.savefig(args.out)
\ No newline at end of file
diff --git a/msDMS_Saleem&Miller_2025/si2_h+i_final.py b/msDMS_Saleem&Miller_2025/si2_h+i_final.py
new file mode 100644
index 0000000..450bf1c
--- /dev/null
+++ b/msDMS_Saleem&Miller_2025/si2_h+i_final.py
@@ -0,0 +1,238 @@
+import pandas as pd
+from sklearn.metrics import roc_curve, roc_auc_score
+import matplotlib.pyplot as plot
+import numpy as np
+import scipy.stats
+import math
+import itertools
+from ReactivityProfile import ReactivityProfile as rprofile
+import argparse
+
+plot.rcParams['pdf.fonttype'] = 42
+plot.rcParams['font.sans-serif'] = 'Arial'
+
+argparser = argparse.ArgumentParser()
+argparser.add_argument('--prof1', required=True)
+argparser.add_argument('--prof2')
+argparser.add_argument('--out', required=True)
+args = argparser.parse_args()
+
+
+# read in the N7 reactivity file
+n7 = rprofile(args.prof1)
+n72 = rprofile(args.prof2)
+
+
+
+def reactivityByneighbors(self, add=None, GA=False, Triplet=False, pos=1, byNT=False, name=''):
+    '''
+    Takes a reactivity profile as an argument.
+    Plots reactivity rates based on neighbors. 
+    Pass a negative number to look upstream, positive for downstream. 
+    Will group purines and pyrimidines by default.
+    '''
+
+    def adjacent_values(vals, q1, q3):
+        upper_adjacent_value = q3 + (q3 - q1) * 1.5
+        upper_adjacent_value = np.clip(upper_adjacent_value, q3, vals[-1])
+
+        lower_adjacent_value = q1 - (q3 - q1) * 1.5
+        lower_adjacent_value = np.clip(lower_adjacent_value, vals[0], q1)
+        return lower_adjacent_value, upper_adjacent_value
+
+
+    def set_axis_style(ax, labels):
+        ax.set_xticks(np.arange(1, len(labels) + 1), labels=labels)
+        ax.set_xlim(0.25, len(labels) + 0.75)
+        ax.set_xlabel('Sample name')
+
+    if add is not None:
+        self.subprofile = np.concatenate([self.subprofile, add.subprofile])
+        self.sequence = np.concatenate([self.sequence, add.sequence])
+
+    def stars(p):
+    ##get stars for annotating plots using a p value
+        if p < 0.000001:
+            return '******'
+        elif p < 0.00001:
+            return '*****'
+        elif p < 0.0001:
+            return "****"
+        elif (p < 0.001):
+            return "***"
+        elif (p < 0.01):
+            return "**"
+        elif (p < 0.05):
+            return "*"
+        else:
+            return "-"
+
+    if Triplet:
+            nts = ['A','G','U','C']
+            _triplets = {}
+            for i in nts:
+                for j in nts:
+                    for k in nts:
+                        _triplets[i + j + k] = []
+                
+            for n in nts:
+                for i, v in enumerate(self.subprofile):
+                    if np.isfinite(v) and self.sequence[i] == n:
+                        pre = self.sequence[i - 1]
+                        nxt = self.sequence[i + 1]
+                        _triplets[pre + n + nxt].append(v)
+
+
+    elif not GA:
+            '''
+            Data is ordered (A) purines, pyrimidines then (G) purines, pyrimidines, etc.
+            
+            '''
+
+
+            nts = ['A','G','U','C']
+            data = []
+            labels = []
+            for n in nts:
+                A = []
+                G = []
+                T = []
+                C = []
+                purines = []
+                pyrimidine = []
+                for i,v in enumerate(self.subprofile):
+                    if np.isfinite(v) and self.sequence[i] == n:
+                        if byNT:
+                            if self.sequence[i + pos] == 'A':
+                                A.append(v)
+                            elif self.sequence[i + pos] == 'G':
+                                G.append(v)
+                            elif self.sequence[i + pos] == 'U':
+                                T.append(v)
+                            else:
+                                C.append(v)
+                        else:
+                            if self.sequence[i + pos] == 'A' or self.sequence[i + pos] == 'G':
+                                purines.append(v)
+                            else:
+                                pyrimidine.append(v)
+                if byNT:
+                    data.append(A.copy())
+                    data.append(G.copy())
+                    data.append(T.copy())
+                    data.append(C.copy())
+                    labels.append(str(n) + ' A')
+                    labels.append(str(n) + ' G')
+                    labels.append(str(n) + ' T')
+                    labels.append(str(n) + ' C')
+                else:
+                    data.append(purines.copy())
+                    data.append(pyrimidine.copy())
+                    labels.append(str(n) + ' Purines')
+                    labels.append(str(n) + ' Pyrimadines')
+
+    else:
+        if byNT:
+            A = []
+            G = []
+            T = []
+            C = []
+
+            for i, v in enumerate(self.subprofile):
+                if np.isfinite(v):
+                    if self.sequence[i + pos] == 'A':
+                        A.append(v)
+                    elif self.sequence[i + pos] == 'G':
+                        G.append(v)
+                    elif self.sequence[i + pos] == 'U':
+                        T.append(v)
+                    else:
+                        C.append(v)
+            labels = ['A','G','U','C']
+
+            data = [A,G,T,C]
+
+
+        else:
+                purines = []
+                pyrimidine = []
+                print('GA')
+                for i, v in enumerate(self.normprofile):
+                    if np.isfinite(v):
+                        if self.sequence[i + pos] == 'A' or self.sequence[i + pos] == 'G':
+                            purines.append(v)
+                        else:
+                            pyrimidine.append(v)
+
+                labels = ['purines', 'pyrimidines']
+                data = [purines, pyrimidine]
+    if not Triplet:
+        if GA:
+            fig, ax = plot.subplots(nrows = 2, ncols = 1, figsize=(6,8))
+        elif not GA and not byNT:
+            fig, ax = plot.subplots(nrows = 2, ncols = 1, figsize=(8,16))
+        else:
+            fig, ax = plot.subplots(nrows = 2, ncols = 1, figsize=(30,30))
+        quants = []
+        for i in data:
+            quants.append([.25,.75])
+
+
+        ax[0].hist(data[0], histtype = 'step', label='Purines')
+        ax[0].hist(data[1], histtype = 'step', label='Pyrimidines')
+        ax[0].legend()
+
+        for i in data:
+            quantiles = np.quantile(i, np.array([ 0.75]))
+            print(quantiles)
+
+        tabledata = [['Comparison', 'P value']]
+        for i,j in itertools.combinations(data, 2):
+
+                u_stat, p = scipy.stats.mannwhitneyu(i, j, alternative='two-sided')
+
+                p_value = p
+                print(p_value)
+                print('{}   {}'.format(data.index(i), data.index(j)))
+
+                #get max and min for putting the annotation in the right place
+                y_max = np.max(np.concatenate((i, j)))
+                y_min = np.min(np.concatenate((i, j)))
+
+                tabledata.append([labels[data.index(i)] + ' vs. ' + labels[data.index(j)], str(p_value)])
+
+            
+        ax[1].table(cellText=tabledata, loc='center' )
+
+        fig.tight_layout(pad = 2)
+        fig.savefig(name + '.pdf')
+    else:
+        out = open('{}.txt'.format(name), 'w')
+        out.write('triplet\tmean_rep1\tmedian_rep1\ttriplet_count_rep1\n')
+        quants = []
+        labels = []
+        data = []
+        for i in _triplets:
+            print(i)
+            try:
+                quantiles = np.quantile(_triplets[i], np.array([0.5]))
+                mean = np.mean(_triplets[i])
+                out.write('{}\t{:.6}\t{:.6}\t{}\n'.format(i, mean, quantiles[0], len(_triplets[i])))
+                labels.append(i)
+                data.append(_triplets[i])
+                quants.append([.25,.75])
+            except:
+                continue
+
+        fig, ax = plot.subplots(nrows = 1, ncols = 1, figsize=(30,30))
+
+        ax.violinplot(data, showmedians=True, quantiles=quants)
+
+        ax.set_xticks(np.arange(1, len(labels) + 1), labels=labels)
+        ax.set_ylim(0,0.25)
+        ax.set_ylabel('BG sub reactivity')
+        fig.tight_layout(pad = 2)
+        fig.savefig(name + '.pdf')
+
+
+reactivityByneighbors(n7, add=n72, GA=True, byNT=False, Triplet=False, pos=1, name=args.out)
diff --git a/msDMS_Saleem&Miller_2025/si2_k_final.py b/msDMS_Saleem&Miller_2025/si2_k_final.py
new file mode 100644
index 0000000..d5be507
--- /dev/null
+++ b/msDMS_Saleem&Miller_2025/si2_k_final.py
@@ -0,0 +1,67 @@
+
+import re
+import sys
+import argparse
+from sklearn.metrics import roc_curve, roc_auc_score
+import matplotlib.pyplot as pyplot
+import numpy as np
+
+from ReactivityProfile import ReactivityProfile as rprofile
+
+argparser = argparse.ArgumentParser()
+argparser.add_argument('--prof')
+argparser.add_argument('--sasa')
+argparser.add_argument('--out')
+args = argparser.parse_args()
+# read in the N7 reactivity file
+n7 = rprofile(args.prof)
+
+bgprof = n7.backprofile
+rawprof = n7.rawprofile
+bgsub = n7.subprofile
+normprof = n7.normprofile
+
+react, seq = [],[]
+x=0
+
+
+while x < (len(n7.sequence)):
+    seq.append(n7.sequence[x])
+    react.append(n7.normprofile[x])
+    x += 1
+
+
+# read in the sasa file output from pymol script
+sasa = {}
+with open(args.sasa) as inp:
+    for line in inp:
+        spl = line.split()
+        sasa[int(spl[0])-1] = float(spl[1])
+        
+print(sasa)
+# plot the data at several different cutoffs
+for cut in (1,2):
+    s,r = [],[]
+
+    # go through the RNA, only looking at Gs with defined reactivites (not NaN), check to see if the G measured is in the crystal structure as well
+    for i,v in enumerate(seq):
+        if v == 'G' and np.isfinite(react[i]) and i in sasa:
+            r.append(react[i])
+            s.append(int(sasa[i] < cut))
+    
+
+
+    # compute ROC curve and plot, along with AUC
+    tmp = roc_curve(s, r, drop_intermediate=False)
+    pyplot.plot(tmp[0], tmp[1], label='{}: {:.2f}'.format(cut, roc_auc_score(s,r)))
+    
+
+pyplot.plot([0, 1], [0, 1], linestyle="--")
+pyplot.xlim([0.0, 1.0])
+pyplot.ylim([0.0, 1.05])
+pyplot.title('')
+pyplot.legend()
+
+pyplot.savefig(args.out)
+
+
diff --git a/msDMS_Saleem&Miller_2025/si3_ac_calcs.py b/msDMS_Saleem&Miller_2025/si3_ac_calcs.py
new file mode 100644
index 0000000..c7c6603
--- /dev/null
+++ b/msDMS_Saleem&Miller_2025/si3_ac_calcs.py
@@ -0,0 +1,96 @@
+import argparse
+import matplotlib.pyplot as plot
+import numpy as np
+import glob
+from ReactivityProfile import ReactivityProfile as rprofile
+
+argparser = argparse.ArgumentParser()
+argparser.add_argument('--sasa')
+argparser.add_argument('--subunit', choices=['16', '23'])
+argparser.add_argument('--out')
+args = argparser.parse_args()
+
+prot_cut = 1.6
+
+sasa_cut = 1
+
+infile = args.sasa
+
+
+out = open(args.out,'w')
+
+adjustframe = 0
+def adjust_frame(profile, difference):
+    profile.nts = [x + difference for x in profile.nts]
+
+#match end half of profile names
+false_p = []
+false_out = open('{}_false_negatives.txt'.format(sasa.split('.')[0]), 'w')
+for file in glob.glob("*ec{}S_profile.txtga".format(args.subunit)):
+    print(file)
+    n7 = rprofile(file)
+    adjust_frame(n7,adjustframe)
+
+    # read in the sasa file output from pymol script
+    sasa = {}
+    with open(infile) as inp:
+        for line in inp:
+            spl = line.split()
+            sasa[int(spl[0])] = float(spl[1])
+
+    x = 0
+    protected = {}
+    cfprotected = {}
+    while x < len(n7.normprofile):
+        ##loop through the normprofile and take any normalized values above what we consider "protected"
+        if np.isfinite(n7.normprofile[x]) and n7.normprofile[x] >= prot_cut and n7.sequence[x] == 'G' and n7.nts[x] in sasa:
+            protected[n7.nts[x]] = n7.normprofile[x]
+
+        x += 1
+
+
+    positives = {}
+    negatives = {}
+    for i in sasa:
+            ##if measured area is greater than our cutoff, it is unprotected and therefore a "negative"
+            if sasa[i] >= sasa_cut:
+                negatives[i] = sasa[i]
+            ##if measured area is lower than our cutoff it is protected and therefore "positive"
+            if sasa[i] < sasa_cut:
+                positives[i] =  sasa[i]
+
+    TP = 0
+    FP = 0
+    TN = 0
+    FN = 0
+    for i in sasa:
+        ##compare our dict of protected values against the positive and negatives at every position, tick up appropriate bin
+        if i in protected and i in positives:
+            TP += 1
+        if i in protected and i in negatives:
+            FP += 1
+            
+        if i in positives and i not in protected:
+            FN += 1
+            false_p.append(str(i))
+        if i in negatives and i not in protected:
+            TN += 1
+
+    out.write(' TP: {}\n FP: {}\n TN: {}\n FN: {}\n'.format(TP, FP, TN, FN))
+    out.write('Sensitivity: {}\n'.format(TP/(TP+FN)))
+    out.write('Specificity: {}\n'.format(TN/(TN+FP)))
+    out.write('PPV: {}\n'.format(TP/(TP+FP)))
+    out.write('NPV: {}\n'.format(TN/(TN+FN)))
+    out.write('Diagnostic Accuracy: {}\n'.format((TP + TN)/(TP + FP + FN + TN))) 
+    out.write('Protection cutoff: {}\nSASA cutoff:{}\n'.format(prot_cut,sasa_cut))
+    out.write(str(len(sasa)))
+    out.write('\n')
+    out.write(str(TP+FP+TN+FN))
+    out.write('\n')
+
+print(len(set(false_p)))
+for nt in set(false_p):
+    false_out.write(nt+' ')
+
+out.close()
+
diff --git a/msDMS_Saleem&Miller_2025/si3_f_final.py b/msDMS_Saleem&Miller_2025/si3_f_final.py
new file mode 100644
index 0000000..1b1232c
--- /dev/null
+++ b/msDMS_Saleem&Miller_2025/si3_f_final.py
@@ -0,0 +1,69 @@
+import numpy as np
+from ReactivityProfile import ReactivityProfile as rprofile
+import argparse
+from pymol import cmd
+
+def get_nts_profile(profile):
+    nt_list = []
+    prof = rprofile(profile)
+    for i,v in enumerate(prof.normprofile):
+        if np.isfinite(v) and v >= 1.6:
+            nt_list.append(i + 1)
+    return nt_list
+
+def adjust_frame(number, listnts):
+    adjustednts = []
+    for i in listnts:
+        adjustednts.append(i+number)
+    return adjustednts
+
+
+
+def get_nts(input_file):
+    nts = []
+    with open(input_file, 'r') as inp:
+        for i,line in enumerate(inp):
+            if i == 0 or i == 1:
+                continue
+            spl= line.split()
+            nts.append(int(spl[0]))
+    return nts
+
+def get_nt_list(inputfile):
+    with open(inputfile, 'r') as inp:
+        for line in inp:
+            nts = line.split()
+    return nts
+
+
+def color_nts(chain, nt_list):
+    cmd.set_color('protected', (83, 48, 95))
+    for G in nt_list:
+        try:
+            cmd.select('protected_nts', 'chain {} and resi {}'.format(chain, G), merge=1)
+            cmd.color('protected', 'chain {} and resi {}'.format(chain, G))
+        except:
+            print('Nt {} not in structure'.format(G))
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--pdbfile', required=True, help='path to crystal structure')
+    parser.add_argument('--chain', type=str, required=True, help='RNA chain id in crystal structure')
+    parser.add_argument('--input', required=True, help='path to infile')
+    parser.add_argument('--outputName', required=True)
+    parser.add_argument('--adjustframe', type=int)
+    parser.add_argument('--profile', action="store_true")
+    args = parser.parse_args()
+    cmd.load(args.pdbfile)
+    #to_color = get_nts(args.input)
+    if not args.profile:
+        to_color = get_nt_list(args.input)
+    else:
+        to_color = get_nts_profile(args.input)
+    if args.adjustframe is not None:
+        to_color = adjust_frame(args.adjustframe,to_color)
+
+    color_nts(args.chain, to_color)
+
+    cmd.save(args.outputName + '.pse')
\ No newline at end of file
diff --git a/msDMS_Saleem&Miller_2025/si6_a_final.py b/msDMS_Saleem&Miller_2025/si6_a_final.py
new file mode 100644
index 0000000..b23d8b5
--- /dev/null
+++ b/msDMS_Saleem&Miller_2025/si6_a_final.py
@@ -0,0 +1,99 @@
+
+import random
+import numpy as np
+from ReactivityProfile import ReactivityProfile as rprofile
+import RNAStructureObjects as RNAtools
+
+import matplotlib.pyplot as plot
+import scipy.stats
+
+import sys, os, math
+import argparse
+plot.rcParams['pdf.fonttype'] = 42
+plot.rcParams['font.sans-serif'] = 'Arial'
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--profile', required=True, help='path to profile')
+parser.add_argument('--ctfile', required=True)
+parser.add_argument('--profile2', required=True, help='path to profile')
+parser.add_argument('--ctfile2', required=True)
+parser.add_argument('--output', required=True, help='output prefix')
+parser.add_argument('--cutoff', type=int, default=3)
+parser.add_argument('--filterdistance', type=int, default=10)
+args = parser.parse_args()
+
+
+def contact_filter(protections,CT1, filter_distance=0):
+    
+    num_within_range = {}
+    for i in protections:
+        count = 0
+        for j in protections:
+            if i == j:
+                continue
+            if CT1.contactDistance(i, j) > filter_distance:
+                #print('filtered')
+                continue
+            else:
+                count += 1
+        num_within_range[i] = count
+    return num_within_range
+
+def violin_plot(name, labels, data, p):
+    quants = []
+    for i in data:
+        quants.append([.25,.75])
+
+    fig, ax = plot.subplots(nrows = 1, ncols = 1)
+    ax.violinplot(data, showmedians=True, quantiles=quants)
+    ax.set_xticks(np.arange(1, len(labels) + 1), labels=labels)
+    #ax.set_ylim(0, 40)
+    ax.set_ylabel('number of protections within CF 5')
+    ax.set_title('P = {}'.format(p))
+    fig.tight_layout(pad = 2)
+    fig.savefig(name + '.pdf')
+
+
+prof = rprofile(args.profile)
+CT = RNAtools.CT(args.ctfile)
+gs = []
+prot = []
+random_prot = []
+for i,v in enumerate(prof.normprofile):
+    if np.isfinite(v):
+        gs.append(i+1)
+    if v >= args.cutoff:
+        prot.append(i +1)
+
+random_prot = random.sample(gs, k=len(prot))
+print(prot)
+print(random_prot)
+random_counts = contact_filter(random_prot, CT, args.filterdistance)
+counts = contact_filter(prot, CT, args.filterdistance)
+
+prof2 = rprofile(args.profile2)
+CT2 = RNAtools.CT(args.ctfile2)
+gs2 = []
+prot2 = []
+random_prot2 = []
+for i,v in enumerate(prof2.normprofile):
+    if np.isfinite(v):
+        gs2.append(i+1)
+    if v >= args.cutoff:
+        prot2.append(i +1)
+
+random_prot2 = random.sample(gs2, k=len(prot2))
+print(prot2)
+print(random_prot2)
+random_counts2 = contact_filter(random_prot2, CT2, args.filterdistance)
+counts2 = contact_filter(prot2, CT2, args.filterdistance)
+
+x_labels = ['16s Observed', '23s real', '16s Random Distribution', '23s random']
+data_toplot = [list(counts.values()), list(counts2.values()), list(random_counts.values()), list(random_counts2.values())]
+u, p = scipy.stats.mannwhitneyu(list(counts.values()), list(random_counts.values()), alternative='two-sided')
+u2, p2 = scipy.stats.mannwhitneyu(list(counts2.values()), list(random_counts2.values()), alternative='two-sided')
+print(p)
+print(p2)
+
+violin_plot(args.output, x_labels, data_toplot, str(p) + '\t' + str(p2))
+