From 2602007dd5393839dee5b466444099400644e0a9 Mon Sep 17 00:00:00 2001 From: Seweryn Walentynowicz Date: Tue, 19 Sep 2023 19:36:09 +0200 Subject: [PATCH 01/42] move vendor dir outside classmap dirs --- .gitignore | 2 +- composer.json | 3 +-- docker-compose.yml | 1 + docker-dev-up.sh | 4 ++-- engine/external/pear/HTML/Select.php | 2 +- engine/include.php | 2 +- 6 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.gitignore b/.gitignore index d9734d08..108b66b9 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,5 @@ engine/templates_c/* -engine/include/vendor +vendor config/config.local.php docker/www/config/config.local.php secured_data/ccls diff --git a/composer.json b/composer.json index c3f6e3c9..500edcf7 100644 --- a/composer.json +++ b/composer.json @@ -1,7 +1,6 @@ { "config": { - "platform": {"php":"5.6"}, - "vendor-dir": "engine/include/vendor" + "platform": {"php":"5.6"} }, "autoload": { diff --git a/docker-compose.yml b/docker-compose.yml index bdcc6585..de4fb4eb 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -13,6 +13,7 @@ services: - ./docker/www/config:/home/inforex/config - ./secured_data:/home/inforex/secured_data - ./phpunit:/home/inforex/phpunit + - ./vendor:/home/inforex/vendor depends_on: - db restart: always diff --git a/docker-dev-up.sh b/docker-dev-up.sh index 7ce37345..9f940b24 100755 --- a/docker-dev-up.sh +++ b/docker-dev-up.sh @@ -2,7 +2,7 @@ composer update -AUTOLOAD=engine/include/vendor/autoload.php +AUTOLOAD=vendor/autoload.php if [ -f $AUTOLOAD ]; then docker-compose build @@ -10,4 +10,4 @@ if [ -f $AUTOLOAD ]; then else echo -e "[\e[31mERROR\e[0m] $AUTOLOAD not found" echo -e "Make sure that '\e[32mcomposer\e[0m' is installed in order to run '\e[32mcomposer update\e[0m' and generate $AUTOLOAD" -fi \ No newline at end of file +fi diff --git a/engine/external/pear/HTML/Select.php b/engine/external/pear/HTML/Select.php index 47f4bd88..54dce9f1 100644 --- a/engine/external/pear/HTML/Select.php +++ b/engine/external/pear/HTML/Select.php @@ -13,7 +13,7 @@ * @link http://pear.php.net/package/HTML_Select */ -require_once __DIR__ . '/../../../include/vendor/autoload.php'; +require_once __DIR__ . '/../../../../vendor/autoload.php'; /** * Class to dynamically create an HTML SELECT diff --git a/engine/include.php b/engine/include.php index 2c43a4fa..189eba19 100644 --- a/engine/include.php +++ b/engine/include.php @@ -5,4 +5,4 @@ $include_paths[] = get_include_path(); set_include_path( implode(PATH_SEPARATOR, $include_paths) ); -require_once __DIR__ . '/include/vendor/autoload.php'; \ No newline at end of file +require_once __DIR__ . '/../vendor/autoload.php'; From f28b62646952847407111731e8a57d92e286c34d Mon Sep 17 00:00:00 2001 From: Seweryn Walentynowicz Date: Thu, 21 Sep 2023 20:16:12 +0200 Subject: [PATCH 02/42] extract FileWriter class for export --- engine/include/export/ConllAndJsonFactory.php | 26 ++++++----- engine/include/export/FileWriter.php | 27 ++++++++++++ .../engine/include/export/FileWriterTest.php | 44 +++++++++++++++++++ 3 files changed, 86 insertions(+), 11 deletions(-) create mode 100644 engine/include/export/FileWriter.php create mode 100644 phpunit/tests/engine/include/export/FileWriterTest.php diff --git a/engine/include/export/ConllAndJsonFactory.php b/engine/include/export/ConllAndJsonFactory.php index 68c9a4cf..fbd0778d 100644 --- a/engine/include/export/ConllAndJsonFactory.php +++ b/engine/include/export/ConllAndJsonFactory.php @@ -1,10 +1,9 @@ makeConllAndJsonExportData($ccl, $tokens, $relations, $annotations, $tokens_ids, $annotations_by_id); + $fw = new FileWriter(); + $fw->writeTextToFile($file_path_without_ext . ".conll",$conll); + $fw->writeJSONToFile($file_path_without_ext . ".json",$json_builder); - $handle = fopen($file_path_without_ext . ".json", "w"); - fwrite($handle, json_encode($json_builder, JSON_PRETTY_PRINT + JSON_UNESCAPED_UNICODE)); - fclose($handle); - } + } // exportToConllAndJson() -} \ No newline at end of file +} // ConllAndJsonFactory class diff --git a/engine/include/export/FileWriter.php b/engine/include/export/FileWriter.php new file mode 100644 index 00000000..44c6cda0 --- /dev/null +++ b/engine/include/export/FileWriter.php @@ -0,0 +1,27 @@ +writeTextToFile($fileName,$textContent); + + } // writeJSONToFile() + +} // FileWriter class diff --git a/phpunit/tests/engine/include/export/FileWriterTest.php b/phpunit/tests/engine/include/export/FileWriterTest.php new file mode 100644 index 00000000..30a4748b --- /dev/null +++ b/phpunit/tests/engine/include/export/FileWriterTest.php @@ -0,0 +1,44 @@ +virtualDir = org\bovigo\vfs\vfsStream::setup('root',null,[]); + + } // setUp() + + public function test_writeTextToFile() { + + $fileName = $this->virtualDir->url()."/test.txt"; + $text = "jnduie773nd n"; + $fw = new FileWriter(); + $fw->writeTextToFile($fileName,$text); + $result = file_get_contents($fileName); + $this->assertEquals($text,$result); + + } // test_writeTextToFile() + + public function test_writeJSONToFile() { + + $fileName = $this->virtualDir->url()."/test.txt"; + $jsonArray = array('a' => 1); + $fw = new FileWriter(); + $fw->writeJSONToFile($fileName,$jsonArray); + $result = file_get_contents($fileName); + $expected = +'{ + "a": 1 +}'; + $this->assertEquals($expected,$result); + + } // test_writeJSONToFile + +} // FileWriterTest class + +?> From 0ad37de351c6ad31fb36ac0e190e5ed04372b323 Mon Sep 17 00:00:00 2001 From: Seweryn Walentynowicz Date: Sat, 23 Sep 2023 15:14:47 +0200 Subject: [PATCH 03/42] fix static call of nonstatic method - trouble in testing --- engine/include/export/CorpusExporter.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/engine/include/export/CorpusExporter.php b/engine/include/export/CorpusExporter.php index 4bcd64a4..d465e18e 100644 --- a/engine/include/export/CorpusExporter.php +++ b/engine/include/export/CorpusExporter.php @@ -568,7 +568,7 @@ protected function export_document($report_id, $extractors, $disamb_only, &$extr $file_path_without_ext = $output_folder . "/" . $ccl->getFileName(); /* Wygeneruj CONLL i JSON */ - ConllAndJsonFactory::exportToConllAndJson($file_path_without_ext, $ccl, $tokens, $relations, $annotations, $tokens_ids, $annotations_by_id); + (new ConllAndJsonFactory())->exportToConllAndJson($file_path_without_ext, $ccl, $tokens, $relations, $annotations, $tokens_ids, $annotations_by_id); /* Wygeneruj xml i rel.xml */ CclFactory::setAnnotationsAndRelations($ccl, $annotations, $relations); From 439281b1894a9a2f5f227ede29e195fb1ee5872d Mon Sep 17 00:00:00 2001 From: Seweryn Walentynowicz Date: Tue, 26 Sep 2023 17:53:04 +0200 Subject: [PATCH 04/42] unit tests for conll i json export --- engine/include/export/ConllAndJsonFactory.php | 3 +- .../export/ConllAndJsonFactoryTest.php | 227 ++++++++++++++++++ 2 files changed, 228 insertions(+), 2 deletions(-) create mode 100644 phpunit/tests/engine/include/export/ConllAndJsonFactoryTest.php diff --git a/engine/include/export/ConllAndJsonFactory.php b/engine/include/export/ConllAndJsonFactory.php index fbd0778d..dd516bec 100644 --- a/engine/include/export/ConllAndJsonFactory.php +++ b/engine/include/export/ConllAndJsonFactory.php @@ -2,7 +2,7 @@ class ConllAndJsonFactory { - private function makeConllAndJsonExportData($ccl, $tokens, $relations, $annotations, $tokens_ids, $annotations_by_id) { + protected function makeConllAndJsonExportData($ccl, $tokens, $relations, $annotations, $tokens_ids, $annotations_by_id) { /** * Create a cache for 'token from' to boost processing @@ -154,7 +154,6 @@ private function makeConllAndJsonExportData($ccl, $tokens, $relations, $annotati public function exportToConllAndJson($file_path_without_ext, $ccl, $tokens, $relations, $annotations, $tokens_ids, $annotations_by_id) { - list($conll,$json_builder) = $this->makeConllAndJsonExportData($ccl, $tokens, $relations, $annotations, $tokens_ids, $annotations_by_id); $fw = new FileWriter(); $fw->writeTextToFile($file_path_without_ext . ".conll",$conll); diff --git a/phpunit/tests/engine/include/export/ConllAndJsonFactoryTest.php b/phpunit/tests/engine/include/export/ConllAndJsonFactoryTest.php new file mode 100644 index 00000000..3e333577 --- /dev/null +++ b/phpunit/tests/engine/include/export/ConllAndJsonFactoryTest.php @@ -0,0 +1,227 @@ +virtualDir = org\bovigo\vfs\vfsStream::setup('root',null,[]); + $this->generateFullTestData(); + + } // setUp() + + private function generateReportTestData() { + + // $report = DbReport::getReportById($report_id); + // "SELECT * FROM reports WHERE id = $report_id" + // id,corpora,date,title,source,author,content,type, + // status,user_id,subcorpus_id,tokenization,format_id, + // lang,filename,parent_report_id,deleted + $report = array( "id"=>$this->report_id, "corpora"=>12, + "date"=>'1970-01-01', "title"=>'tytuł', "source"=>'źródło', + "author"=>'Autor', + "content"=>'To jest duże okno. Bardzo duże.', "type"=>1, + "status"=>2, "user_id"=>1, "subcorpus_id"=>null, + "tokenization"=>null, "format_id"=>2, "lang"=>'pol', + "filename"=>'plik.txt', "parent_report_id"=>null, + "deleted"=>0 + ); + return $report; + + } // generateReportTestData() + + private function generateTokensTestData() { + + $report_id = $this->report_id; + // $tokens = DbToken::getTokenByReportId($report_id, null, true); + // "SELECT * FROM tokens LEFT JOIN orths USING (orth_id) WHERE report_id = ? ORDER BY `from`" + // token_id, report_id, from, to, eos, orth_id, orth + $tokens = array( + ["token_id"=>1, "report_id"=>$report_id, "from"=>0, "to"=>1, "eos"=>0, "orth_id"=>1, "orth"=>'To'], + ["token_id"=>2, "report_id"=>$report_id, "from"=>2, "to"=>5, "eos"=>0, "orth_id"=>2, "orth"=>'jest'], + ["token_id"=>3, "report_id"=>$report_id, "from"=>6, "to"=>9, "eos"=>0, "orth_id"=>3, "orth"=>'duże'], + ["token_id"=>4, "report_id"=>$report_id, "from"=>10, "to"=>13, "eos"=>0, "orth_id"=>4, "orth"=>'okno'], + ["token_id"=>5, "report_id"=>$report_id, "from"=>14, "to"=>14, "eos"=>1, "orth_id"=>5, "orth"=>'.'], + ["token_id"=>6, "report_id"=>$report_id, "from"=>15, "to"=>20, "eos"=>0, "orth_id"=>6, "orth"=>'Bardzo'], + ["token_id"=>7, "report_id"=>$report_id, "from"=>21, "to"=>24, "eos"=>0, "orth_id"=>3, "orth"=>'duże'], + ["token_id"=>8, "report_id"=>$report_id, "from"=>25, "to"=>25, "eos"=>1, "orth_id"=>5, "orth"=>'.'] + ); + + return $tokens; + + } // generateTokensTestData() + + private function generateCclTestData() { + + // $report parameter + $report = $this->generateReportTestData(); + // $tokens parameter + $tokens = $this->generateTokensTestData(); + // $tags_by_tokens parameter + $tags_by_tokens = array(); + + $ccl = CclFactory::createFromReportAndTokens($report, $tokens, $tags_by_tokens); + return $ccl; + + } // generateCclTestData() + + private function generateFullTestData() { + + $this->report_id = 1234; + + // $file_path_without_ext parameter + $this->file_path_without_ext = $this->virtualDir->url()."/test"; + + // $ccl parameter + $this->ccl = $this->generateCclTestData(); + + + } // generateFullTestData() + +// tests + + public function testEmptyReportMakesEmptyDataToWrite() { + + // args for call + $report = array(); + $tokens = array(); + $tags_by_tokens = array(); + $ccl = CclFactory::createFromReportAndTokens($report, $tokens, $tags_by_tokens); + $tokens = array(); + $relations = array(); + $annotations = array(); + $tokens_ids = array(); + $annotations_by_id = array(); + + // invoke protected method + $protectedMethod = new ReflectionMethod('ConllAndJsonFactory','makeConllAndJsonExportData'); + $protectedMethod->setAccessible(True); + list($conll,$json_builder) = $protectedMethod->invoke(new ConllAndJsonFactory(),$ccl,$tokens,$relations,$annotations,$tokens_ids,$annotations_by_id); + + // check results + $expectedConll = "ORDER_ID\tTOKEN_ID\tORTH\tCTAG\tFROM\tTO\tANN_TAGS\tANN_IDS\tREL_IDS\tREL_TARGET_ANN_IDS\n"; + $this->assertEquals($expectedConll,$conll); + + $expectedJson = array( + "chunks" => array( + array() + ), + "relations" => array(), + "annotations" => array() + ); + $this->assertEquals($expectedJson,$json_builder); + + } // testEmptyReportMakesEmptyDataToWrite() + + public function testAllDataPlacesToDataConllAndJsonStructures() { + + // args for call + $tokens = array(); + + // DbToken::getTokenByReportId($report_id, null, true); + $relations = array(); + $annotations = array(); + $tokens_ids = array(); + $annotations_by_id = array(); + + // invoke protected method + $protectedMethod = new ReflectionMethod('ConllAndJsonFactory','makeConllAndJsonExportData'); + $protectedMethod->setAccessible(True); + list($conll,$json_builder) = $protectedMethod->invoke(new ConllAndJsonFactory(),$this->ccl,$tokens,$relations,$annotations,$tokens_ids,$annotations_by_id); + + // check results + $expectedConll = "ORDER_ID\tTOKEN_ID\tORTH\tCTAG\tFROM\tTO\tANN_TAGS\tANN_IDS\tREL_IDS\tREL_TARGET_ANN_IDS\n"; + $expectedConll .= "0\t0\tTo\t\t0\t1\tO\t_\t_\t_\n". + "1\t1\tjest\t\t2\t5\tO\t_\t_\t_\n". + "2\t2\tduże\t\t6\t9\tO\t_\t_\t_\n". + "3\t3\tokno\t\t10\t13\tO\t_\t_\t_\n". + "4\t4\t.\t\t14\t14\tO\t_\t_\t_\n". + "\n". + "5\t0\tBardzo\t\t15\t20\tO\t_\t_\t_\n". + "6\t1\tduże\t\t21\t24\tO\t_\t_\t_\n". + "7\t2\t.\t\t25\t25\tO\t_\t_\t_\n". + "\n"; + $this->assertEquals($expectedConll,$conll); + + $expectedJson = array( + "chunks" => array( + array( + array( + array('order_id' => 0,'token_id' => 0,'orth' => 'To','ctag' => null,'from' => 0,'to' => 1,'annotations' => array(),'relations' => array()), + array('order_id' => 1,'token_id' => 1,'orth' => 'jest','ctag' => null,'from' => 2,'to' => 5,'annotations' => array(),'relations' => array()), + array('order_id' => 2,'token_id' => 2,'orth' => 'duże','ctag' => null,'from' => 6,'to' => 9,'annotations' => array(),'relations' => array()), + array('order_id' => 3,'token_id' => 3,'orth' => 'okno','ctag' => null,'from' => 10,'to' => 13,'annotations' => array(),'relations' => array()), + array('order_id' => 4,'token_id' => 4,'orth' => '.','ctag' => null,'from' => 14,'to' => 14,'annotations' => array(),'relations' => array()) + ), + array( + array('order_id' => 5,'token_id' => 0,'orth' => 'Bardzo','ctag' => null,'from' => 15,'to' => 20,'annotations' => array(),'relations' => array()), + array('order_id' => 6,'token_id' => 1,'orth' => 'duże','ctag' => null,'from' => 21,'to' => 24,'annotations' => array(),'relations' => array()), + array('order_id' => 7,'token_id' => 2,'orth' => '.','ctag' => null,'from' => 25,'to' => 25,'annotations' => array(),'relations' => array()) + ) + ) + ), + "relations" => array(), + "annotations" => array() + ); + $this->assertEquals($expectedJson,$json_builder); + + } // testAllDataPlacesToDataConllAndJsonStructures() + +/* class ConllAndJsonFactory has only 1 function + function exportToConllAndJson($file_path_without_ext, $ccl, $tokens, $relations, $annotations, $tokens_ids, $annotations_by_id) +*/ + + public function testDeliveredDataAreWrittenToFiles() { + + $conll = "jakiś text"; + $json_builder = array("a"=>1); + + // this values doesn't matter + $report = array(); + $tokens = array(); + $tags_by_tokens = array(); + $ccl = CclFactory::createFromReportAndTokens($report, $tokens, $tags_by_tokens); + $tokens = array(); + $relations = array(); + $annotations = array(); + $tokens_ids = array(); + $annotations_by_id = array(); + + // self-mocking another method + $mockedMethodNamesList = array('makeConllAndJsonExportData'); + $mockedResult = array($conll,$json_builder); + $mock = $this->getMockBuilder(ConllAndJsonFactory::class) + -> setMethods($mockedMethodNamesList) // ustawia je na null + -> getMock(); + $mock // i mockuje na zwracanie określonego rezultatu + -> method('makeConllAndJsonExportData') + -> will($this->returnValue($mockedResult)); + + // metoda exportToConllAndJson() powinna zachować obsługę oryginalną + $mock->exportToConllAndJson($this->file_path_without_ext,$this->ccl,$tokens,$relations,$annotations,$tokens_ids,$annotations_by_id); + + $expectedConll = $conll; + $conllFileName = $this->file_path_without_ext.".conll"; + $resultConll = file_get_contents($conllFileName); + $this->assertEquals($expectedConll,$resultConll); + $expectedJson = +'{ + "a": 1 +}'; + $jsonFileName = $this->file_path_without_ext.".json"; + $resultJson = file_get_contents($jsonFileName); + $this->assertEquals($expectedJson,$resultJson); + + } // testDeliveredDataAreWrittenToFiles + +} // ConllAndJsonFactoryTest class + +?> From e49806ff8f82fd1a4c95625dd486daf1a1e8b40c Mon Sep 17 00:00:00 2001 From: Seweryn Walentynowicz Date: Wed, 4 Oct 2023 12:57:06 +0200 Subject: [PATCH 05/42] unit tests with tokens, annotations, relations, lemmas and attributes --- .../export/ConllAndJsonFactoryTest.php | 310 ++++++++++++++---- 1 file changed, 254 insertions(+), 56 deletions(-) diff --git a/phpunit/tests/engine/include/export/ConllAndJsonFactoryTest.php b/phpunit/tests/engine/include/export/ConllAndJsonFactoryTest.php index 3e333577..d312a7aa 100644 --- a/phpunit/tests/engine/include/export/ConllAndJsonFactoryTest.php +++ b/phpunit/tests/engine/include/export/ConllAndJsonFactoryTest.php @@ -8,18 +8,29 @@ class ConllAndJsonFactoryTest extends PHPUnit_Framework_TestCase { private $virtualDir = null; // data for test private $report_id = null; + private $ccl = null; + private $tokens = null; + private $tokens_ids = null; // data for class method private $file_path_without_ext = null; + // reflection to protected method in tested class + private $protectedMethod = null; protected function setUp() { $this->virtualDir = org\bovigo\vfs\vfsStream::setup('root',null,[]); $this->generateFullTestData(); + // protected method to invoke in all tests + $this->protectedMethod = new ReflectionMethod('ConllAndJsonFactory','makeConllAndJsonExportData'); + $this->protectedMethod->setAccessible(True); } // setUp() - private function generateReportTestData() { + private function generateReportTestData($report_id=1234) { + if(!$this->report_id) { + $this->report_id = $report_id; + } // $report = DbReport::getReportById($report_id); // "SELECT * FROM reports WHERE id = $report_id" // id,corpora,date,title,source,author,content,type, @@ -38,8 +49,12 @@ private function generateReportTestData() { } // generateReportTestData() - private function generateTokensTestData() { + private function generateTokensTestData($report_id=1234) { + if(!$this->report_id) { + $this->report_id = $report_id; + } + $report_id = $this->report_id; // $tokens = DbToken::getTokenByReportId($report_id, null, true); // "SELECT * FROM tokens LEFT JOIN orths USING (orth_id) WHERE report_id = ? ORDER BY `from`" @@ -59,12 +74,127 @@ private function generateTokensTestData() { } // generateTokensTestData() - private function generateCclTestData() { + private function getExpectedConllHeader() { + // static header for all CONLL files + return "ORDER_ID\tTOKEN_ID\tORTH\tCTAG\tFROM\tTO\tANN_TAGS\tANN_IDS\tREL_IDS\tREL_TARGET_ANN_IDS\n"; + } // + + private function getExpectedConll() { + // expected Conll pattern for our data and not annotations + $expectedConll = $this->getExpectedConllHeader(); + $expectedConll .= "0\t0\tTo\t\t0\t1\tO\t_\t_\t_\n". + "1\t1\tjest\t\t2\t5\tO\t_\t_\t_\n". + "2\t2\tduże\t\t6\t9\tO\t_\t_\t_\n". + //"2\t2\tduże\t\t6\t9\tB-\t1\t_\t_\n". + //"2\t2\tduże\t\t6\t9\tB-nam_adj\t1\t_\t_\n". + "3\t3\tokno\t\t10\t13\tO\t_\t_\t_\n". + "4\t4\t.\t\t14\t14\tO\t_\t_\t_\n". + "\n". + "5\t0\tBardzo\t\t15\t20\tO\t_\t_\t_\n". + "6\t1\tduże\t\t21\t24\tO\t_\t_\t_\n". + "7\t2\t.\t\t25\t25\tO\t_\t_\t_\n". + "\n"; + return $expectedConll; + } // getExpectedConll() + + private function getExpectedEmptyJson() { + + return array( + "chunks" => array( + array() + ), + "relations" => array(), + "annotations" => array() + ); + + } // getExpectedEmptyJson() + + private function getExpectedChunks() { + // returns "chunks" section in expected table for JSON + // for our data without annotations + $chunks = array( + array( + array( + array('order_id' => 0,'token_id' => 0,'orth' => 'To','ctag' => null,'from' => 0,'to' => 1,'annotations' => array(),'relations' => array()), + array('order_id' => 1,'token_id' => 1,'orth' => 'jest','ctag' => null,'from' => 2,'to' => 5,'annotations' => array(),'relations' => array()), + array('order_id' => 2,'token_id' => 2,'orth' => 'duże','ctag' => null,'from' => 6,'to' => 9,'annotations' => array(),'relations' => array()), + array('order_id' => 3,'token_id' => 3,'orth' => 'okno','ctag' => null,'from' => 10,'to' => 13,'annotations' => array(),'relations' => array()), + array('order_id' => 4,'token_id' => 4,'orth' => '.','ctag' => null,'from' => 14,'to' => 14,'annotations' => array(),'relations' => array()) + ), + array( + array('order_id' => 5,'token_id' => 0,'orth' => 'Bardzo','ctag' => null,'from' => 15,'to' => 20,'annotations' => array(),'relations' => array()), + array('order_id' => 6,'token_id' => 1,'orth' => 'duże','ctag' => null,'from' => 21,'to' => 24,'annotations' => array(),'relations' => array()), + array('order_id' => 7,'token_id' => 2,'orth' => '.','ctag' => null,'from' => 25,'to' => 25,'annotations' => array(),'relations' => array()) + ) + ) + ); + return $chunks; + } // getExpectedChunks + + private function generateAnnotation_By_IdTestData( + $annotationExtractorFields=true, + $annotationsWithLemmaField=true, + $annotationsWithTypeAttributte=true + ) { + + $annotation_id = 1; + // index must match 'id' field + $annotations_by_id = array( + $annotation_id => array( 'id'=>$annotation_id, + // fields, that always exists + // only 'to' changes business logic + 'report_id'=>1,'type_id'=>360,'from'=>6,'to'=>9,'text'=>'duże','user_id'=>1,'creation_time'=>'2022-10-03 08:07:37','stage'=>'final','source'=>'user','annotation_subset_id'=>'52', + // this are always, but from different tables + 'type'=>'nam_adj','group_id'=>1, + ) + + ); + if($annotationExtractorFields) { + // fields, that exists for extractor 'annotation' only + $annotations_by_id[$annotation_id]['login']='admin'; + $annotations_by_id[$annotation_id]['screename']='Inforex Admin'; + if($annotationsWithLemmaField){ + $annotations_by_id[$annotation_id]['lemma']='lemat dodany do duże'; + } else { + $annotations_by_id[$annotation_id]['lemma']=null; + } + } else { + // fields, that exists for extractors 'annotation_id' + // and 'annotation_subset_id' only + $annotations_by_id[$annotation_id]['group']=1; + $annotations_by_id[$annotation_id]['name']='nam_adj'; + $annotations_by_id[$annotation_id]['description']='Przymiotnik utworzony od nazwy własnej'; + $annotations_by_id[$annotation_id]['css']='background: lightgreen;'; + $annotations_by_id[$annotation_id]['cross_sentence']='0'; + $annotations_by_id[$annotation_id]['shortlist']='0'; + // this section only for simple extractor with attribute in DB + if($annotationsWithTypeAttributte) { + $annotations_by_id[$annotation_id]['annotation_id']=$annotation_id; + $annotations_by_id[$annotation_id]['annotation_attribute_id']=1; + $annotations_by_id[$annotation_id]['value']='valueAtrTypu'; + $annotations_by_id[$annotation_id]['prop']='valueAtrTypu'; + } + } + + return $annotations_by_id; + + } // generateAnnotation_By_IdTestData() + + private function generateAnnotationsFromAnnotations_By_Id(array $annotations_by_id) { + return array_values($annotations_by_id); + + } // generateAnnotationsFromAnnotations_By_Id + + private function generateCclTestData($report=null,$tokens=null) { // $report parameter - $report = $this->generateReportTestData(); + if($report==null){ + $report = $this->generateReportTestData(); + } // $tokens parameter - $tokens = $this->generateTokensTestData(); + if($tokens==null){ + $tokens = $this->generateTokensTestData(); + } // $tags_by_tokens parameter $tags_by_tokens = array(); @@ -80,8 +210,16 @@ private function generateFullTestData() { // $file_path_without_ext parameter $this->file_path_without_ext = $this->virtualDir->url()."/test"; + // $report parameter, local for CCL only + $report = $this->generateReportTestData(); + + // $tokens parameter + $this->tokens = $this->generateTokensTestData($this->report_id); + // tak jest ustawiane zawsze w CorpusExporter + $this->tokens_ids = array_column($this->tokens, 'token_id'); + // $ccl parameter - $this->ccl = $this->generateCclTestData(); + $this->ccl = $this->generateCclTestData($report,$this->tokens); } // generateFullTestData() @@ -90,6 +228,7 @@ private function generateFullTestData() { public function testEmptyReportMakesEmptyDataToWrite() { + // all empty input data generates minimal export output // args for call $report = array(); $tokens = array(); @@ -101,80 +240,139 @@ public function testEmptyReportMakesEmptyDataToWrite() { $tokens_ids = array(); $annotations_by_id = array(); - // invoke protected method - $protectedMethod = new ReflectionMethod('ConllAndJsonFactory','makeConllAndJsonExportData'); - $protectedMethod->setAccessible(True); - list($conll,$json_builder) = $protectedMethod->invoke(new ConllAndJsonFactory(),$ccl,$tokens,$relations,$annotations,$tokens_ids,$annotations_by_id); + // invoke tested method + list($conll,$json_builder) = $this->protectedMethod->invoke(new ConllAndJsonFactory(),$ccl,$tokens,$relations,$annotations,$tokens_ids,$annotations_by_id); // check results - $expectedConll = "ORDER_ID\tTOKEN_ID\tORTH\tCTAG\tFROM\tTO\tANN_TAGS\tANN_IDS\tREL_IDS\tREL_TARGET_ANN_IDS\n"; - $this->assertEquals($expectedConll,$conll); - - $expectedJson = array( - "chunks" => array( - array() - ), - "relations" => array(), - "annotations" => array() - ); - $this->assertEquals($expectedJson,$json_builder); + $this->assertEquals($this->getExpectedConllHeader(),$conll); + $this->assertEquals($this->getExpectedEmptyJson(),$json_builder); } // testEmptyReportMakesEmptyDataToWrite() - public function testAllDataPlacesToDataConllAndJsonStructures() { + public function testReportArrayFieldsMakesEmptyDataToWrite() { + + // any content of $report array doesn't generate nonempty output // args for call $tokens = array(); - - // DbToken::getTokenByReportId($report_id, null, true); + $tags_by_tokens = array(); $relations = array(); $annotations = array(); $tokens_ids = array(); $annotations_by_id = array(); - // invoke protected method - $protectedMethod = new ReflectionMethod('ConllAndJsonFactory','makeConllAndJsonExportData'); - $protectedMethod->setAccessible(True); - list($conll,$json_builder) = $protectedMethod->invoke(new ConllAndJsonFactory(),$this->ccl,$tokens,$relations,$annotations,$tokens_ids,$annotations_by_id); + // test only 'id' field + $report = array('id'=>1234); + $ccl = CclFactory::createFromReportAndTokens($report, $tokens, $tags_by_tokens); + list($conll,$json_builder) = $this->protectedMethod->invoke(new ConllAndJsonFactory(),$ccl,$tokens,$relations,$annotations,$tokens_ids,$annotations_by_id); + $this->assertEquals($this->getExpectedConllHeader(),$conll); + $this->assertEquals($this->getExpectedEmptyJson(),$json_builder); + + // test only 'content' field + $report = array('content'=>'jakiś tekst'); + $ccl = CclFactory::createFromReportAndTokens($report, $tokens, $tags_by_tokens); + + list($conll,$json_builder) = $this->protectedMethod->invoke(new ConllAndJsonFactory(),$ccl,$tokens,$relations,$annotations,$tokens_ids,$annotations_by_id); + $this->assertEquals($this->getExpectedConllHeader(),$conll); + $this->assertEquals($this->getExpectedEmptyJson(),$json_builder); + + // test any other field in $report + $report = array('id'=>1234, + 'content'=>'jakiś tekst', + 'other_report_field'=>'other field text'); + $ccl = CclFactory::createFromReportAndTokens($report, $tokens, $tags_by_tokens); + list($conll,$json_builder) = $this->protectedMethod->invoke(new ConllAndJsonFactory(),$ccl,$tokens,$relations,$annotations,$tokens_ids,$annotations_by_id); + $this->assertEquals($this->getExpectedConllHeader(),$conll); + $this->assertEquals($this->getExpectedEmptyJson(),$json_builder); + + } // testReportArrayFieldsMakesEmptyDataToWrite() + + public function testAllDataPlacesToDataConllAndJsonStructures() { + + // args for call + $relations = array(); + $annotations_by_id = $this->generateAnnotation_By_IdTestData(False,True); + $annotations = $this->generateAnnotationsFromAnnotations_By_Id($annotations_by_id); + + // invoke tested method + list($conll,$json_builder) = $this->protectedMethod->invoke(new ConllAndJsonFactory(),$this->ccl,$this->tokens,$relations,$annotations,$this->tokens_ids,$annotations_by_id); // check results - $expectedConll = "ORDER_ID\tTOKEN_ID\tORTH\tCTAG\tFROM\tTO\tANN_TAGS\tANN_IDS\tREL_IDS\tREL_TARGET_ANN_IDS\n"; - $expectedConll .= "0\t0\tTo\t\t0\t1\tO\t_\t_\t_\n". - "1\t1\tjest\t\t2\t5\tO\t_\t_\t_\n". - "2\t2\tduże\t\t6\t9\tO\t_\t_\t_\n". - "3\t3\tokno\t\t10\t13\tO\t_\t_\t_\n". - "4\t4\t.\t\t14\t14\tO\t_\t_\t_\n". - "\n". - "5\t0\tBardzo\t\t15\t20\tO\t_\t_\t_\n". - "6\t1\tduże\t\t21\t24\tO\t_\t_\t_\n". - "7\t2\t.\t\t25\t25\tO\t_\t_\t_\n". - "\n"; + $expectedConll = $this->getExpectedConll(); + $expectedConll = str_replace( + "2\t2\tduże\t\t6\t9\tO\t_\t_\t_\n", + "2\t2\tduże\t\t6\t9\tB-nam_adj\t1\t_\t_\n", + $expectedConll); $this->assertEquals($expectedConll,$conll); + $expectedChunks = $this->getExpectedChunks(); + // annotation_id to 'annotations' list in 3-rd chunk + $expectedChunks[0][0][2]['annotations'][] = 1; $expectedJson = array( - "chunks" => array( - array( - array( - array('order_id' => 0,'token_id' => 0,'orth' => 'To','ctag' => null,'from' => 0,'to' => 1,'annotations' => array(),'relations' => array()), - array('order_id' => 1,'token_id' => 1,'orth' => 'jest','ctag' => null,'from' => 2,'to' => 5,'annotations' => array(),'relations' => array()), - array('order_id' => 2,'token_id' => 2,'orth' => 'duże','ctag' => null,'from' => 6,'to' => 9,'annotations' => array(),'relations' => array()), - array('order_id' => 3,'token_id' => 3,'orth' => 'okno','ctag' => null,'from' => 10,'to' => 13,'annotations' => array(),'relations' => array()), - array('order_id' => 4,'token_id' => 4,'orth' => '.','ctag' => null,'from' => 14,'to' => 14,'annotations' => array(),'relations' => array()) - ), - array( - array('order_id' => 5,'token_id' => 0,'orth' => 'Bardzo','ctag' => null,'from' => 15,'to' => 20,'annotations' => array(),'relations' => array()), - array('order_id' => 6,'token_id' => 1,'orth' => 'duże','ctag' => null,'from' => 21,'to' => 24,'annotations' => array(),'relations' => array()), - array('order_id' => 7,'token_id' => 2,'orth' => '.','ctag' => null,'from' => 25,'to' => 25,'annotations' => array(),'relations' => array()) - ) - ) - ), + "chunks" => $expectedChunks, "relations" => array(), - "annotations" => array() + "annotations" => array( + array('id'=>1,'report_id'=>1,'type_id'=>360,'from'=>6,'to'=>9, + 'text' => 'duże','user_id' => 1, + 'creation_time' => '2022-10-03 08:07:37','stage' => 'final', + 'source' => 'user','annotation_subset_id' => '52', + 'type'=>'nam_adj','group_id'=>1, +/* + 'login' => 'admin','screename' => 'Inforex Admin', + 'lemma' => 'lemat dodany do duże', +*/ + 'group' => 1,'description' => 'Przymiotnik utworzony od nazwy własnej','css' => 'background: lightgreen;','cross_sentence' => '0','shortlist' => '0', + 'name'=>'nam_adj', + // tylko jeśli istnieje atrybut + 'annotation_id' => 1,'annotation_attribute_id' => 1,'value' => 'valueAtrTypu','prop' => 'valueAtrTypu' + ), + ) ); $this->assertEquals($expectedJson,$json_builder); } // testAllDataPlacesToDataConllAndJsonStructures() + public function testDocumentWithoutTokenizationPlaceDataToJsonAndConllArray() { + // CCL is self-tokenized by constructor + // args for call + $tokens = array(); + // tak jest ustawiane zawsze w CorpusExporter + $tokens_ids = array_column($tokens, 'token_id'); + $ccl = $this->generateCclTestData( + $this->generateReportTestData(),$tokens); + $relations = array(); + $annotations = array(); + $annotations_by_id = array(); + + // invoke tested method + list($conll,$json_builder) = $this->protectedMethod->invoke(new ConllAndJsonFactory(),$ccl,$tokens,$relations,$annotations,$tokens_ids,$annotations_by_id); + + // check results + $this->assertEquals($this->getExpectedConll(),$conll); + + $expectedJson = $this->getExpectedEmptyJson(); + $expectedJson["chunks"] = $this->getExpectedChunks(); + $this->assertEquals($expectedJson,$json_builder); + + // with tokens without annotations there are all the same as above + + // invoke tested method + list($conll,$json_builder) = $this->protectedMethod->invoke(new ConllAndJsonFactory(),$this->ccl,$this->tokens,$relations,$annotations,$this->tokens_ids,$annotations_by_id); + + $this->assertEquals($this->getExpectedConll(),$conll); + $this->assertEquals($expectedJson,$json_builder); + + } // testDocumentWithoutTokenizationPlaceDataToJsonAndConllArray() + + public function testGeneralAnnotationFieldsPlacedToJsonAndConllArray() { + + // are all fields which always should be present in annotation + // record placed correctly to JSON and CONLL structures + + $this->assertTrue(True); + + } // testGeneralAnnotationFieldsPlacedToJsonAndConllArray + /* class ConllAndJsonFactory has only 1 function function exportToConllAndJson($file_path_without_ext, $ccl, $tokens, $relations, $annotations, $tokens_ids, $annotations_by_id) */ From 15a5e7149fdaf957e0b4d760b71106044aeb7427 Mon Sep 17 00:00:00 2001 From: Seweryn Walentynowicz Date: Sat, 7 Oct 2023 18:54:46 +0200 Subject: [PATCH 06/42] mocked all Ccl input clases for tests --- .../export/ConllAndJsonFactoryTest.php | 111 +++++++----------- 1 file changed, 43 insertions(+), 68 deletions(-) diff --git a/phpunit/tests/engine/include/export/ConllAndJsonFactoryTest.php b/phpunit/tests/engine/include/export/ConllAndJsonFactoryTest.php index d312a7aa..61cc6240 100644 --- a/phpunit/tests/engine/include/export/ConllAndJsonFactoryTest.php +++ b/phpunit/tests/engine/include/export/ConllAndJsonFactoryTest.php @@ -185,24 +185,6 @@ private function generateAnnotationsFromAnnotations_By_Id(array $annotations_by_ } // generateAnnotationsFromAnnotations_By_Id - private function generateCclTestData($report=null,$tokens=null) { - - // $report parameter - if($report==null){ - $report = $this->generateReportTestData(); - } - // $tokens parameter - if($tokens==null){ - $tokens = $this->generateTokensTestData(); - } - // $tags_by_tokens parameter - $tags_by_tokens = array(); - - $ccl = CclFactory::createFromReportAndTokens($report, $tokens, $tags_by_tokens); - return $ccl; - - } // generateCclTestData() - private function generateFullTestData() { $this->report_id = 1234; @@ -210,35 +192,68 @@ private function generateFullTestData() { // $file_path_without_ext parameter $this->file_path_without_ext = $this->virtualDir->url()."/test"; - // $report parameter, local for CCL only - $report = $this->generateReportTestData(); - // $tokens parameter $this->tokens = $this->generateTokensTestData($this->report_id); // tak jest ustawiane zawsze w CorpusExporter $this->tokens_ids = array_column($this->tokens, 'token_id'); // $ccl parameter - $this->ccl = $this->generateCclTestData($report,$this->tokens); - + $this->ccl = $this->generateFullCclData(); } // generateFullTestData() + private function makeMockToken($id,$orth,$lexemes,$from,$to) { + + $mockToken = $this->getMockBuilder(CclToken::class)->getMock(); + $mockToken->id = $id; + $mockToken->orth = $orth; + $mockToken->lexemes = $lexemes; + $mockToken->from = $from; + $mockToken->to = $to; + return $mockToken; + + } // makeMockToken + + private function generateFullCclData() { + + // mocked $ccl argument for empty report + $mockToken11 = $this->makeMockToken(0,"To",array(),0,1); + $mockToken12 = $this->makeMockToken(1,"jest",array(),2,5); + $mockToken13 = $this->makeMockToken(2,"duże",array(),6,9); + $mockToken14 = $this->makeMockToken(3,"okno",array(),10,13); + $mockToken15 = $this->makeMockToken(4,".",array(),14,14); + $mockSentence1 = $this->getMockBuilder(CclSentence::class)->getMock(); + $mockSentence1->tokens = array($mockToken11,$mockToken12,$mockToken13,$mockToken14,$mockToken15); + $mockToken21 = $this->makeMockToken(5,"Bardzo",array(),15,20); + $mockToken22 = $this->makeMockToken(6,"duże",array(),21,24); + $mockToken23 = $this->makeMockToken(7,".",array(),25,25); + $mockSentence2 = $this->getMockBuilder(CclSentence::class)->getMock(); + $mockSentence2->tokens = array($mockToken21,$mockToken22,$mockToken23); + $mockChunk = $this->getMockBuilder(CclChunk::class)->getMock(); + $mockChunk->sentences = array($mockSentence1,$mockSentence2); + $mockCclDocument = $this->getMockBuilder(CclDocument::class)->getMock(); + $mockCclDocument->chunks = array($mockChunk); + return $mockCclDocument; + + } // generateFullCclData() + // tests public function testEmptyReportMakesEmptyDataToWrite() { // all empty input data generates minimal export output // args for call - $report = array(); - $tokens = array(); - $tags_by_tokens = array(); - $ccl = CclFactory::createFromReportAndTokens($report, $tokens, $tags_by_tokens); $tokens = array(); $relations = array(); $annotations = array(); $tokens_ids = array(); $annotations_by_id = array(); + // mocked $ccl argument for empty report + $mockChunk = $this->getMockBuilder(CclChunk::class)->getMock(); + $mockChunk->sentences = array(); + $mockCclDocument = $this->getMockBuilder(CclDocument::class)->getMock(); + $mockCclDocument->chunks = array($mockChunk); + $ccl = $mockCclDocument; // invoke tested method list($conll,$json_builder) = $this->protectedMethod->invoke(new ConllAndJsonFactory(),$ccl,$tokens,$relations,$annotations,$tokens_ids,$annotations_by_id); @@ -249,44 +264,6 @@ public function testEmptyReportMakesEmptyDataToWrite() { } // testEmptyReportMakesEmptyDataToWrite() - public function testReportArrayFieldsMakesEmptyDataToWrite() { - - // any content of $report array doesn't generate nonempty output - - // args for call - $tokens = array(); - $tags_by_tokens = array(); - $relations = array(); - $annotations = array(); - $tokens_ids = array(); - $annotations_by_id = array(); - - // test only 'id' field - $report = array('id'=>1234); - $ccl = CclFactory::createFromReportAndTokens($report, $tokens, $tags_by_tokens); - list($conll,$json_builder) = $this->protectedMethod->invoke(new ConllAndJsonFactory(),$ccl,$tokens,$relations,$annotations,$tokens_ids,$annotations_by_id); - $this->assertEquals($this->getExpectedConllHeader(),$conll); - $this->assertEquals($this->getExpectedEmptyJson(),$json_builder); - - // test only 'content' field - $report = array('content'=>'jakiś tekst'); - $ccl = CclFactory::createFromReportAndTokens($report, $tokens, $tags_by_tokens); - - list($conll,$json_builder) = $this->protectedMethod->invoke(new ConllAndJsonFactory(),$ccl,$tokens,$relations,$annotations,$tokens_ids,$annotations_by_id); - $this->assertEquals($this->getExpectedConllHeader(),$conll); - $this->assertEquals($this->getExpectedEmptyJson(),$json_builder); - - // test any other field in $report - $report = array('id'=>1234, - 'content'=>'jakiś tekst', - 'other_report_field'=>'other field text'); - $ccl = CclFactory::createFromReportAndTokens($report, $tokens, $tags_by_tokens); - list($conll,$json_builder) = $this->protectedMethod->invoke(new ConllAndJsonFactory(),$ccl,$tokens,$relations,$annotations,$tokens_ids,$annotations_by_id); - $this->assertEquals($this->getExpectedConllHeader(),$conll); - $this->assertEquals($this->getExpectedEmptyJson(),$json_builder); - - } // testReportArrayFieldsMakesEmptyDataToWrite() - public function testAllDataPlacesToDataConllAndJsonStructures() { // args for call @@ -338,14 +315,12 @@ public function testDocumentWithoutTokenizationPlaceDataToJsonAndConllArray() { $tokens = array(); // tak jest ustawiane zawsze w CorpusExporter $tokens_ids = array_column($tokens, 'token_id'); - $ccl = $this->generateCclTestData( - $this->generateReportTestData(),$tokens); $relations = array(); $annotations = array(); $annotations_by_id = array(); // invoke tested method - list($conll,$json_builder) = $this->protectedMethod->invoke(new ConllAndJsonFactory(),$ccl,$tokens,$relations,$annotations,$tokens_ids,$annotations_by_id); + list($conll,$json_builder) = $this->protectedMethod->invoke(new ConllAndJsonFactory(),$this->ccl,$tokens,$relations,$annotations,$tokens_ids,$annotations_by_id); // check results $this->assertEquals($this->getExpectedConll(),$conll); From 0f56fd7e69f217ef5b653ae1a0337292c264a825 Mon Sep 17 00:00:00 2001 From: Seweryn Walentynowicz Date: Mon, 9 Oct 2023 19:01:59 +0200 Subject: [PATCH 07/42] export unit tests for elementary Ccl elements --- .../export/ConllAndJsonFactoryTest.php | 110 ++++++++++++++++++ 1 file changed, 110 insertions(+) diff --git a/phpunit/tests/engine/include/export/ConllAndJsonFactoryTest.php b/phpunit/tests/engine/include/export/ConllAndJsonFactoryTest.php index 61cc6240..2af10178 100644 --- a/phpunit/tests/engine/include/export/ConllAndJsonFactoryTest.php +++ b/phpunit/tests/engine/include/export/ConllAndJsonFactoryTest.php @@ -264,6 +264,116 @@ public function testEmptyReportMakesEmptyDataToWrite() { } // testEmptyReportMakesEmptyDataToWrite() + public function testOneEmptySentenceCclReportMakesLinesWithNoData() { + + // only 1 empty sentence in 1 chunk in Ccl + // args for call + $tokens = array(); + $relations = array(); + $annotations = array(); + $tokens_ids = array(); + $annotations_by_id = array(); + // mocked $ccl argument for empty report + $mockSentence = $this->getMockBuilder(CclSentence::class)->getMock(); + $mockSentence->tokens = array(); + $mockChunk = $this->getMockBuilder(CclChunk::class)->getMock(); + $mockChunk->sentences = array($mockSentence); + $mockCclDocument = $this->getMockBuilder(CclDocument::class)->getMock(); + $mockCclDocument->chunks = array($mockChunk); + $ccl = $mockCclDocument; + + // invoke tested method + list($conll,$json_builder) = $this->protectedMethod->invoke(new ConllAndJsonFactory(),$ccl,$tokens,$relations,$annotations,$tokens_ids,$annotations_by_id); + + // check results + // empty sentence generates one empty line in export CONLL + $expectedConll = $this->getExpectedConllHeader()."\n"; + $this->assertEquals($expectedConll,$conll); + $expectedJson = $this->getExpectedEmptyJson(); + // empty sentence generates one empty array in export JSON + $expectedJson['chunks'][0][] = array(); + $this->assertEquals($expectedJson,$json_builder); + + } // testOneEmptySentenceCclReportMakesLinesWithNoData() + + public function testOneEmptyTokenCclReportMakesTokenWithEmptyData() { + + // only 1 sentence with 1 empty token in 1 chunk in Ccl + // args for call + $tokens = array(); + $relations = array(); + $annotations = array(); + $tokens_ids = array(); + $annotations_by_id = array(); + // mocked $ccl argument for empty report + $mockToken = $this->getMockBuilder(CclToken::class)->getMock(); + $mockSentence = $this->getMockBuilder(CclSentence::class)->getMock(); + $mockSentence->tokens = array($mockToken); + $mockChunk = $this->getMockBuilder(CclChunk::class)->getMock(); + $mockChunk->sentences = array($mockSentence); + $mockCclDocument = $this->getMockBuilder(CclDocument::class)->getMock(); + $mockCclDocument->chunks = array($mockChunk); + $ccl = $mockCclDocument; + + // invoke tested method + list($conll,$json_builder) = $this->protectedMethod->invoke(new ConllAndJsonFactory(),$ccl,$tokens,$relations,$annotations,$tokens_ids,$annotations_by_id); + + // check results + // empty token generates row with empty data in export CONLL + $expectedConll = $this->getExpectedConllHeader() + ."\t0\t\t\t\t\tO\t_\t_\t_\n" + ."\n"; + $this->assertEquals($expectedConll,$conll); + // empty token generates one token array in sentence with export + // dummy data in JSON & also creates annotations and relations arrays + $emptyTokenData = array('order_id' => null,'token_id' => 0,'orth' => null,'ctag' => null,'from' => null,'to' => null,'annotations' => Array (),'relations' => Array ()); + $expectedJson['chunks'][0][] = array($emptyTokenData); + // if any token exist, must be annotations & relations tables in JSON + $expectedJson["annotations"] = array(); + $expectedJson["relations"] = array(); + $this->assertEquals($expectedJson,$json_builder); + + } // testOneEmptyTokenCclReportMakesTokenWithEmptyData() + + public function testOneTokenCclMakesNonemptyDataToWrite() { + + // minimal set - 1 sentence with 1 normal token in 1 chunk in Ccl + // args for call + $tokens = array(); + $relations = array(); + $annotations = array(); + $tokens_ids = array(); + $annotations_by_id = array(); + // mocked $ccl argument for empty report + $mockToken = $this->makeMockToken(0,"To",array(),0,1); + $mockSentence = $this->getMockBuilder(CclSentence::class)->getMock(); + $mockSentence->tokens = array($mockToken); + $mockChunk = $this->getMockBuilder(CclChunk::class)->getMock(); + $mockChunk->sentences = array($mockSentence); + $mockCclDocument = $this->getMockBuilder(CclDocument::class)->getMock(); + $mockCclDocument->chunks = array($mockChunk); + $ccl = $mockCclDocument; + + // invoke tested method + list($conll,$json_builder) = $this->protectedMethod->invoke(new ConllAndJsonFactory(),$ccl,$tokens,$relations,$annotations,$tokens_ids,$annotations_by_id); + + // check results + // simple token generates row with its data in export CONLL + $expectedConll = $this->getExpectedConllHeader() + ."0\t0\tTo\t\t0\t1\tO\t_\t_\t_\n" + ."\n"; + $this->assertEquals($expectedConll,$conll); + // one token generates token array with data in sentence array + // & also creates annotations and relations arrays in export JSON + $emptyTokenData = array('order_id' => 0,'token_id' => 0,'orth' => 'To','ctag' => null,'from' => 0,'to' => 1,'annotations' => Array (),'relations' => Array ()); + $expectedJson['chunks'][0][] = array($emptyTokenData); + // if any token exist, must be annotations & relations tables in JSON + $expectedJson["annotations"] = array(); + $expectedJson["relations"] = array(); + $this->assertEquals($expectedJson,$json_builder); + + } // testOneTokenCclMakesNonemptyDataToWrite + public function testAllDataPlacesToDataConllAndJsonStructures() { // args for call From 6da0801786ea251f3e681aa8bb4e46857b0424d7 Mon Sep 17 00:00:00 2001 From: Seweryn Walentynowicz Date: Thu, 12 Oct 2023 08:17:19 +0200 Subject: [PATCH 08/42] unit test for export with relation --- .../export/ConllAndJsonFactoryTest.php | 24 +++++++++++++------ 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/phpunit/tests/engine/include/export/ConllAndJsonFactoryTest.php b/phpunit/tests/engine/include/export/ConllAndJsonFactoryTest.php index 2af10178..b0460fef 100644 --- a/phpunit/tests/engine/include/export/ConllAndJsonFactoryTest.php +++ b/phpunit/tests/engine/include/export/ConllAndJsonFactoryTest.php @@ -372,12 +372,23 @@ public function testOneTokenCclMakesNonemptyDataToWrite() { $expectedJson["relations"] = array(); $this->assertEquals($expectedJson,$json_builder); - } // testOneTokenCclMakesNonemptyDataToWrite + } // testOneTokenCclMakesNonemptyDataToWrite() public function testAllDataPlacesToDataConllAndJsonStructures() { // args for call $relations = array(); + $relationData = array( + 'report_id' => '1', + 'id' => '1', + 'relation_type_id' => '1', + 'source_id' => '1', + 'target_id' => '2', + 'relation_set_id' => '1', + 'name' => 'test', + 'rsname' => 'test', + ); + $relations = array( $relationData ); $annotations_by_id = $this->generateAnnotation_By_IdTestData(False,True); $annotations = $this->generateAnnotationsFromAnnotations_By_Id($annotations_by_id); @@ -388,16 +399,19 @@ public function testAllDataPlacesToDataConllAndJsonStructures() { $expectedConll = $this->getExpectedConll(); $expectedConll = str_replace( "2\t2\tduże\t\t6\t9\tO\t_\t_\t_\n", - "2\t2\tduże\t\t6\t9\tB-nam_adj\t1\t_\t_\n", + "2\t2\tduże\t\t6\t9\tB-nam_adj\t1\t1\t2\n", $expectedConll); $this->assertEquals($expectedConll,$conll); $expectedChunks = $this->getExpectedChunks(); // annotation_id to 'annotations' list in 3-rd chunk $expectedChunks[0][0][2]['annotations'][] = 1; + $expectedChunks[0][0][2]['relations'][0] = 1; $expectedJson = array( "chunks" => $expectedChunks, - "relations" => array(), + "relations" => array( + $relationData + ), "annotations" => array( array('id'=>1,'report_id'=>1,'type_id'=>360,'from'=>6,'to'=>9, 'text' => 'duże','user_id' => 1, @@ -468,10 +482,6 @@ public function testDeliveredDataAreWrittenToFiles() { $json_builder = array("a"=>1); // this values doesn't matter - $report = array(); - $tokens = array(); - $tags_by_tokens = array(); - $ccl = CclFactory::createFromReportAndTokens($report, $tokens, $tags_by_tokens); $tokens = array(); $relations = array(); $annotations = array(); From 88eddcf78c681974b3a9a201b4c799122e53579d Mon Sep 17 00:00:00 2001 From: Seweryn Walentynowicz Date: Fri, 20 Oct 2023 17:51:18 +0200 Subject: [PATCH 09/42] synchro with relation export startegy change --- engine/include/database/CDbCorpusRelation.php | 36 ++++++++----------- engine/include/export/CorpusExporter.php | 16 +++++++-- engine/include/export/ExportManager.php | 2 +- engine/templates/page_corpus_export.tpl | 1 + .../export/CorpusExporter_part0_Test.php | 4 +-- public_html/js/c_widget_annotation_panel.js | 4 +-- 6 files changed, 34 insertions(+), 29 deletions(-) diff --git a/engine/include/database/CDbCorpusRelation.php b/engine/include/database/CDbCorpusRelation.php index 8f281e8c..28f99854 100644 --- a/engine/include/database/CDbCorpusRelation.php +++ b/engine/include/database/CDbCorpusRelation.php @@ -163,28 +163,20 @@ static function getRelationsByRelationSetIds($relation_set_ids){ return $db->fetch_rows($sql); } - //TODO to delete - static function getRelationsBySets($report_ids, $relation_type_ids){ - global $db; - $sql = "SELECT reports_annotations.report_id as report_id, rel.id, rel.relation_type_id, rel.source_id, rel.target_id, relation_types.name " . - "FROM " . - "(SELECT * " . - "FROM relations " . - "WHERE source_id IN " . - "(SELECT id " . - "FROM reports_annotations " . - "WHERE report_id IN('" . implode("','",$report_ids) . "')) " . - "AND relation_type_id " . - "IN (".implode(",",$relation_type_ids).")) rel " . - "LEFT JOIN relation_types " . - "ON rel.relation_type_id=relation_types.id " . - "LEFT JOIN reports_annotations " . - "ON rel.source_id=reports_annotations.id "; - return $db->fetch_rows($sql); - } - - static function getRelationsBySets2($report_ids=null, $relation_set_ids=null, $relation_type_ids=null, $stage_ids=null, $user_ids=null){ + static function getRelationsBySets($report_ids=null, $relation_set_ids=null, $relation_type_ids=null, $stage_ids=null, $user_ids=null, $relation_stages=array()){ global $db; + + // if $relation_stages not set is equal $stage_ids - stages of + // relation are identical as stages of annotations + if( is_array($relation_stages) && (count($relation_stages)==0)) { + $relation_stages = $stage_ids; + } // if not set + + if (is_array($relation_stages) && (count($relation_stages)>0)) { + $relationStages = "stage IN('".implode("','",$relation_stages)."') AND"; + } else { // if $relation_stages==null default is 'final' + $relationStages = "stage = 'final' AND"; + } $sql = "SELECT reports_annotations.report_id as report_id, " . " rel.id, " . " rel.relation_type_id, " . @@ -196,7 +188,7 @@ static function getRelationsBySets2($report_ids=null, $relation_set_ids=null, $r "FROM " . "(SELECT * " . "FROM relations " . - "WHERE stage = 'final' AND source_id IN " . + "WHERE ".$relationStages." source_id IN " . "(SELECT id " . "FROM reports_annotations " . "WHERE report_id IN('0','" . implode("','",$report_ids) . "')) " . diff --git a/engine/include/export/CorpusExporter.php b/engine/include/export/CorpusExporter.php index d465e18e..92fdf8fd 100644 --- a/engine/include/export/CorpusExporter.php +++ b/engine/include/export/CorpusExporter.php @@ -109,6 +109,7 @@ protected function parse_extractor($description){ $params['attributes_annotation_subset_ids'] = null; $params['relation_set_ids'] = null; $params['stages'] = null; + $params['relation_stages'] = array(); // internally expanded foreach ( explode(";", $parts[1]) as $part ){ $name_value = explode("#", $part); @@ -126,6 +127,17 @@ protected function parse_extractor($description){ } } + // hint for selecting annotation in stage final and relation + // in stage agreement + if( is_array($params["stages"])) { + foreach($params["stages"] as &$stage) { + if($stage=='relationagreement') { + $stage = 'final'; // for annotations + $params["relation_stages"] = array('agreement'); // for relations + } // if 'relationagreement' + } // foreach "stages" + } // is_array('stages') + $extractor["params"] = $params; $extractor["extractor"] = function($report_id, $params, &$elements){ // $params -- annotations_set_ids, $stages @@ -163,7 +175,7 @@ protected function parse_extractor($description){ } if(is_array($params['relation_set_ids']) && count($params['relation_set_ids'])>0) { // add custom relation - $relations = DbCorpusRelation::getRelationsBySets2(array($report_id), $params['relation_set_ids'], null, $params["stages"],$params["user_ids"]); + $relations = DbCorpusRelation::getRelationsBySets(array($report_id), $params['relation_set_ids'], null, $params["stages"],$params["user_ids"],$params["relation_stages"]); if ( is_array($relations) ) { $elements['relations'] = array_merge($elements['relations'], $relations); } @@ -188,7 +200,7 @@ protected function parse_extractor($description){ $extractor["params"] = explode(",", $parts[1]); $extractor["extractor"] = function($report_id, $params, &$elements){ // $params -- set of annotation_set_id - $relations = DbCorpusRelation::getRelationsBySets2(array($report_id), $params); + $relations = DbCorpusRelation::getRelationsBySets(array($report_id), $params); if ( is_array($relations) ) { $elements['relations'] = array_merge($elements['relations'], $relations); } diff --git a/engine/include/export/ExportManager.php b/engine/include/export/ExportManager.php index 80a27532..8960f190 100644 --- a/engine/include/export/ExportManager.php +++ b/engine/include/export/ExportManager.php @@ -298,7 +298,7 @@ function readContent(){ } $this->log(" e) reading relations ..."); - $relations = DbCorpusRelation::getRelationsBySets2($this->report_ids, + $relations = DbCorpusRelation::getRelationsBySets($this->report_ids, $this->relation_set_ids, $this->relation_type_ids); foreach ($relations as &$relation){ $report_id = $relation['report_id']; diff --git a/engine/templates/page_corpus_export.tpl b/engine/templates/page_corpus_export.tpl index 2c3fb395..a928d070 100644 --- a/engine/templates/page_corpus_export.tpl +++ b/engine/templates/page_corpus_export.tpl @@ -109,6 +109,7 @@