Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
STUDY_ID SUBJ_ID Age In Years Sex Assessment Date Language
GSE0DUB HCC2935 20 Female 09/15/2014
GSE0DUB HCC4006 20.5 08/31/2014
GSE0DUB HCC827 20.9 Male 10/01/2014 Spain
GSE0DUB NCIH1650 31.1 Female 10/01/2014
GSE0DUB NCIH1975 40 Female 10/10/2014
GSE0DUB NCIH3255 18 Male 09/17/2014 English
GSE0DUB PC14 23 Female 09/28/2014
GSE0DUB PC14 25 Male 09/28/2014
GSE0DUB SKMEL28 90 Female 10/12/2014 English
GSE0DUB SW48 11.5 09/28/2014
GSE0DUB SW48 12 09/28/2014
GSE0DIFFSID HCC2935 20 Female 09/15/2014
GSE0DIFFSID HCC4006 20.5 08/31/2014
GSE0DIFFSID HCC827 20.9 Male 10/01/2014 Spain
GSE0DIFFSID NCIH1650 31.1 Female 10/01/2014
GSE0DIFFSID NCIH1975 40 Female 10/10/2014
GSE0DIFFSID NCIH3255 18 Male 09/17/2014 English
GSE0DIFFSID PC14 23 Female 09/28/2014
GSE0DIFFSID PC14 25 Male 09/28/2014
GSE0DIFFSID SKMEL28 90 Female 10/12/2014 English
GSE0DIFFSID SW48 11.5 09/28/2014
GSE0DIFFSID SW48 12 09/28/2014
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
File Variable Variable Type N null Mean Median IQR Min Max SD Count Required Validation rule QC missing data QC data range
TST_DEMO.txt SUBJ_ID ID 9 0 Yes OK
TST_DEMO.txt Age In Years Numerical 9 0 30.555556 20.9 11.1 11.5 90.0 23.734843 Yes "Greater than 30, when ""Sex"" is equal to ""Male""; Lesser than 50; Greater than or equal to 20; Lesser than or equal to 20; >30; <50; >=20; <=20; 30-50; Between 30 to 50" OK "Range checks failed: >30 ('HCC2935', 'HCC4006', 'HCC827', 'NCIH3255', 'PC14', 'SW48'); 30-50 ('HCC2935', 'HCC4006', 'HCC827', 'NCIH3255', 'PC14', 'SKMEL28', 'SW48'); Between 30 to 50 ('HCC2935', 'HCC4006', 'HCC827', 'NCIH3255', 'PC14', 'SKMEL28', 'SW48'); Lesser than or equal to 20 ('HCC4006', 'HCC827', 'NCIH1650', 'NCIH1975', 'PC14', 'SKMEL28'); <=20 ('HCC4006', 'HCC827', 'NCIH1650', 'NCIH1975', 'PC14', 'SKMEL28'); Greater than 30, when ""Sex"" is equal to ""Male"" ('HCC827', 'NCIH3255'); Greater than or equal to 20 ('NCIH3255', 'SW48'); >=20 ('NCIH3255', 'SW48'); Lesser than 50 ('SKMEL28'); <50 ('SKMEL28')"
TST_DEMO.txt Sex Categorical 7 2 Female: 5, Male: 2 Yes 2 missing ('HCC4006', 'SW48')
TST_DEMO.txt Assessment Date Date 9 0
TST_DEMO.txt Language Text 3 6
TST001.txt SUBJ_ID ID 12 0 Yes OK
TST001.txt Mutant Allele (Genomic) Text 12 0
TST001.txt Mutant Allele (cDNA) Text 12 0
TST001.txt Mutation Type Text 12 0
TST001.txt Variant Type Text 12 0
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,12 @@ import java.nio.charset.StandardCharsets
import java.nio.file.Files
import java.nio.file.Path
import java.sql.SQLException
import java.util.concurrent.*

class ClinicalDataProcessor extends AbstractDataProcessor {
StatisticCollector statistic = new StatisticCollector()
def usedStudyId = ''
int THREAD_COUNT = 4

ClinicalDataProcessor(Object conf) {
super(conf)
Expand Down Expand Up @@ -188,9 +190,30 @@ class ClinicalDataProcessor extends AbstractDataProcessor {

mergeMode = getMergeMode(mappingFile)

final ExecutorService threadPool = Executors.newFixedThreadPool(THREAD_COUNT);
final ExecutorCompletionService<Object> completionService = new ExecutorCompletionService<>(threadPool);

mapping.eachFileMapping { fileMapping ->
this.processFile(sql, dir.resolve(fileMapping.fileName), fileMapping)
completionService.submit(new Callable<Object>() {
@Override
Object call() throws Exception {
sql.connection.autoCommit = false
processFile(sql, dir.resolve(fileMapping.fileName), fileMapping)
}
})
}

for (int i = 0; i< mapping.mappings.size(); i++){
final Future<Object> f = completionService.take()
try {
Object result = f.get()
} catch (ExecutionException e){
throw new DataProcessingException(e.getCause().message)
}
}

threadPool.shutdown()

mappingFileFound = true
}
if (!mappingFileFound) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,12 @@ import java.util.concurrent.Future
class SNPDataProcessor extends AbstractDataProcessor {

int THREAD_COUNT = 4
SNPDataProcessor(Object conf) {
super(conf)
public SNPDataProcessor(Object conf) {
super(conf);
}

@Override
boolean processFiles(Path dir, Sql sql, studyInfo) {
public boolean processFiles(Path dir, Sql sql, studyInfo) {
database.truncateTable(sql, 'lt_src_mrna_subj_samp_map')
database.truncateTable(sql, 'lt_src_mrna_data')

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,14 +50,15 @@ class ClinicalDataProcessorTest extends Specification implements ConfigAwareTest
Study.deleteById(config, clinicalData.studyId)

def expectedFile = new File(clinicalData.dir, 'ExpectedSummaryStatistic.txt')
def expectedFileAnotherOrder = new File(clinicalData.dir, 'ExpectedSummaryStatisticAnotherOrder.txt')
def actualFile = new File(clinicalData.dir, 'SummaryStatistic.txt')
actualFile.delete()
def result = clinicalData.load(config)

then:
assertThat("Clinical data loading shouldn't fail", result, equalTo(true))
actualFile.exists()
actualFile.readLines() == expectedFile.readLines()
actualFile.readLines() == expectedFile.readLines() || actualFile.readLines() == expectedFileAnotherOrder.readLines()
}

def "it should collect statistic"() {
Expand All @@ -73,7 +74,7 @@ class ClinicalDataProcessorTest extends Specification implements ConfigAwareTest

expect:
statistic != null
statistic.tables.keySet() as List == ['TST001.txt', 'TST_DEMO.txt']
statistic.tables.keySet() as List == ['TST001.txt', 'TST_DEMO.txt'] || statistic.tables.keySet() as List == ['TST_DEMO.txt', 'TST001.txt']
def demo = statistic.tables.'TST_DEMO.txt'
demo != null
demo.variables.keySet() as List == ['SUBJ_ID', 'Age In Years', 'Sex', 'Assessment Date', 'Language']
Expand Down