From 68a27db355ed440ba1a6fba60fafd1dd5c58b9be Mon Sep 17 00:00:00 2001 From: femalves Date: Wed, 25 Jun 2025 17:39:15 -0400 Subject: [PATCH 01/15] payloads too large first commit --- vault_service/models.py | 1 + vault_service/views/user.py | 23 +++++++++++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/vault_service/models.py b/vault_service/models.py index 2f0eed9..462ca08 100644 --- a/vault_service/models.py +++ b/vault_service/models.py @@ -80,6 +80,7 @@ class MyADS(Base): name = Column(String) active = Column(Boolean) scix_ui = Column(Boolean, nullable=True, server_default=sa.text('false'), default=False) + get_other_papers = Column(Boolean, nullable=True, server_default=sa.text('true'), default=True) stateful = Column(Boolean) frequency = Column(myads_frequency) template = Column(myads_template, nullable=True) diff --git a/vault_service/views/user.py b/vault_service/views/user.py index cc8d2e7..726a480 100644 --- a/vault_service/views/user.py +++ b/vault_service/views/user.py @@ -249,6 +249,9 @@ def myads_notifications(myads_id=None): 'data': setup.data, 'created': setup.created.isoformat(), 'updated': setup.updated.isoformat()} + # Only include get_other_papers for daily arXiv notifications + if setup.template == 'arxiv' and setup.frequency == 'daily': + output['get_other_papers'] = setup.get_other_papers return json.dumps([output]), 200 @@ -270,6 +273,9 @@ def myads_notifications(myads_id=None): 'data': s.data, 'created': s.created.isoformat(), 'updated': s.updated.isoformat()} + # Only include get_other_papers for daily arXiv notifications + if s.template == 'arxiv' and s.frequency == 'daily': + o['get_other_papers'] = s.get_other_papers output.append(o) return json.dumps(output), 200 @@ -328,6 +334,8 @@ def _create_myads_notification(payload=None, headers=None, user_id=None): frequency=payload.get('frequency')) elif ntype == 'template': + + get_other_papers = False # handles both None values and empty strings if not payload.get('data'): payload['old_data'] = payload.get('data', None) @@ -358,6 +366,8 @@ def _create_myads_notification(payload=None, headers=None, user_id=None): data = payload.get('data', None) stateful = False frequency = payload.get('frequency', 'daily') + if frequency == 'daily': + get_other_papers = payload.get('get_other_papers', True) if payload.get('data', None): name = '{0} - Recent Papers'.format(get_keyword_query_name(payload['data'])) else: @@ -395,6 +405,7 @@ def _create_myads_notification(payload=None, headers=None, user_id=None): template=template, classes=classes, scix_ui=scix_ui_header, + get_other_papers=get_other_papers, data=data) else: return json.dumps({'msg': 'Bad data passed; type must be query or template'}), 400 @@ -437,6 +448,9 @@ def _create_myads_notification(payload=None, headers=None, user_id=None): 'data': setup.data, 'created': setup.created.isoformat(), 'updated': setup.updated.isoformat()} + # Only include get_other_papers for daily arXiv notifications + if setup.template == 'arxiv' and setup.frequency == 'daily': + output['get_other_papers'] = setup.get_other_papers return json.dumps(output), 200 @@ -545,6 +559,9 @@ def _edit_myads_notification(payload=None, headers=None, user_id=None, myads_id= setup.active = payload.get('active', setup.active) setup.stateful = payload.get('stateful', setup.stateful) setup.frequency = payload.get('frequency', setup.frequency) + # Only update get_other_papers for daily arXiv notifications + if setup.template == 'arxiv' and setup.frequency == 'daily': + setup.get_other_papers = payload.get('get_other_papers', setup.get_other_papers) try: session.begin_nested() @@ -573,6 +590,9 @@ def _edit_myads_notification(payload=None, headers=None, user_id=None, myads_id= 'data': setup.data, 'created': setup.created.isoformat(), 'updated': setup.updated.isoformat()} + # Only include get_other_papers for daily arXiv notifications + if setup.template == 'arxiv' and setup.frequency == 'daily': + output['get_other_papers'] = setup.get_other_papers return json.dumps(output), 200 @@ -771,6 +791,9 @@ def get_myads(user_id, start_isodate=None): 'data': s.data, 'created': s.created.isoformat(), 'updated': s.updated.isoformat()} + # Only include get_other_papers for daily arXiv notifications + if s.template == 'arxiv' and s.frequency == 'daily': + o['get_other_papers'] = s.get_other_papers if s.type == 'query': try: From 195233b5946b97461d49cf6ed1f1ce61d81df9b4 Mon Sep 17 00:00:00 2001 From: femalves Date: Wed, 2 Jul 2025 18:09:39 -0400 Subject: [PATCH 02/15] Revert "payloads too large first commit" This reverts commit 68a27db355ed440ba1a6fba60fafd1dd5c58b9be. --- vault_service/models.py | 1 - vault_service/views/user.py | 23 ----------------------- 2 files changed, 24 deletions(-) diff --git a/vault_service/models.py b/vault_service/models.py index 462ca08..2f0eed9 100644 --- a/vault_service/models.py +++ b/vault_service/models.py @@ -80,7 +80,6 @@ class MyADS(Base): name = Column(String) active = Column(Boolean) scix_ui = Column(Boolean, nullable=True, server_default=sa.text('false'), default=False) - get_other_papers = Column(Boolean, nullable=True, server_default=sa.text('true'), default=True) stateful = Column(Boolean) frequency = Column(myads_frequency) template = Column(myads_template, nullable=True) diff --git a/vault_service/views/user.py b/vault_service/views/user.py index 726a480..cc8d2e7 100644 --- a/vault_service/views/user.py +++ b/vault_service/views/user.py @@ -249,9 +249,6 @@ def myads_notifications(myads_id=None): 'data': setup.data, 'created': setup.created.isoformat(), 'updated': setup.updated.isoformat()} - # Only include get_other_papers for daily arXiv notifications - if setup.template == 'arxiv' and setup.frequency == 'daily': - output['get_other_papers'] = setup.get_other_papers return json.dumps([output]), 200 @@ -273,9 +270,6 @@ def myads_notifications(myads_id=None): 'data': s.data, 'created': s.created.isoformat(), 'updated': s.updated.isoformat()} - # Only include get_other_papers for daily arXiv notifications - if s.template == 'arxiv' and s.frequency == 'daily': - o['get_other_papers'] = s.get_other_papers output.append(o) return json.dumps(output), 200 @@ -334,8 +328,6 @@ def _create_myads_notification(payload=None, headers=None, user_id=None): frequency=payload.get('frequency')) elif ntype == 'template': - - get_other_papers = False # handles both None values and empty strings if not payload.get('data'): payload['old_data'] = payload.get('data', None) @@ -366,8 +358,6 @@ def _create_myads_notification(payload=None, headers=None, user_id=None): data = payload.get('data', None) stateful = False frequency = payload.get('frequency', 'daily') - if frequency == 'daily': - get_other_papers = payload.get('get_other_papers', True) if payload.get('data', None): name = '{0} - Recent Papers'.format(get_keyword_query_name(payload['data'])) else: @@ -405,7 +395,6 @@ def _create_myads_notification(payload=None, headers=None, user_id=None): template=template, classes=classes, scix_ui=scix_ui_header, - get_other_papers=get_other_papers, data=data) else: return json.dumps({'msg': 'Bad data passed; type must be query or template'}), 400 @@ -448,9 +437,6 @@ def _create_myads_notification(payload=None, headers=None, user_id=None): 'data': setup.data, 'created': setup.created.isoformat(), 'updated': setup.updated.isoformat()} - # Only include get_other_papers for daily arXiv notifications - if setup.template == 'arxiv' and setup.frequency == 'daily': - output['get_other_papers'] = setup.get_other_papers return json.dumps(output), 200 @@ -559,9 +545,6 @@ def _edit_myads_notification(payload=None, headers=None, user_id=None, myads_id= setup.active = payload.get('active', setup.active) setup.stateful = payload.get('stateful', setup.stateful) setup.frequency = payload.get('frequency', setup.frequency) - # Only update get_other_papers for daily arXiv notifications - if setup.template == 'arxiv' and setup.frequency == 'daily': - setup.get_other_papers = payload.get('get_other_papers', setup.get_other_papers) try: session.begin_nested() @@ -590,9 +573,6 @@ def _edit_myads_notification(payload=None, headers=None, user_id=None, myads_id= 'data': setup.data, 'created': setup.created.isoformat(), 'updated': setup.updated.isoformat()} - # Only include get_other_papers for daily arXiv notifications - if setup.template == 'arxiv' and setup.frequency == 'daily': - output['get_other_papers'] = setup.get_other_papers return json.dumps(output), 200 @@ -791,9 +771,6 @@ def get_myads(user_id, start_isodate=None): 'data': s.data, 'created': s.created.isoformat(), 'updated': s.updated.isoformat()} - # Only include get_other_papers for daily arXiv notifications - if s.template == 'arxiv' and s.frequency == 'daily': - o['get_other_papers'] = s.get_other_papers if s.type == 'query': try: From 06bcba400dd53d4519b4ff669b92100fa16782ab Mon Sep 17 00:00:00 2001 From: femalves Date: Wed, 2 Jul 2025 18:34:04 -0400 Subject: [PATCH 03/15] added all other papers in selected categories flag --- .../f1a2b3c4d5e6_add_get_other_papers_flag.py | 28 ++++++++++++++++ vault_service/models.py | 1 + vault_service/views/user.py | 33 +++++++++++++++---- 3 files changed, 55 insertions(+), 7 deletions(-) create mode 100644 alembic/versions/f1a2b3c4d5e6_add_get_other_papers_flag.py diff --git a/alembic/versions/f1a2b3c4d5e6_add_get_other_papers_flag.py b/alembic/versions/f1a2b3c4d5e6_add_get_other_papers_flag.py new file mode 100644 index 0000000..1aee6bb --- /dev/null +++ b/alembic/versions/f1a2b3c4d5e6_add_get_other_papers_flag.py @@ -0,0 +1,28 @@ +"""Add get_other_papers flag to myads table + +Revision ID: f1a2b3c4d5e6 +Revises: 4cc89f2f896b +Create Date: 2025-01-11 12:00:00.000000 + +""" + +# revision identifiers, used by Alembic. +revision = 'f1a2b3c4d5e6' +down_revision = '4cc89f2f896b' + +from alembic import op +import sqlalchemy as sa + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.add_column('myads', sa.Column('get_other_papers', sa.Boolean(), server_default=sa.text('true'), nullable=True)) + op.execute("UPDATE myads SET get_other_papers = TRUE WHERE get_other_papers IS NULL") + op.alter_column('myads', 'get_other_papers', nullable=False) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column('myads', 'get_other_papers') + # ### end Alembic commands ### \ No newline at end of file diff --git a/vault_service/models.py b/vault_service/models.py index 2f0eed9..462ca08 100644 --- a/vault_service/models.py +++ b/vault_service/models.py @@ -80,6 +80,7 @@ class MyADS(Base): name = Column(String) active = Column(Boolean) scix_ui = Column(Boolean, nullable=True, server_default=sa.text('false'), default=False) + get_other_papers = Column(Boolean, nullable=True, server_default=sa.text('true'), default=True) stateful = Column(Boolean) frequency = Column(myads_frequency) template = Column(myads_template, nullable=True) diff --git a/vault_service/views/user.py b/vault_service/views/user.py index cc8d2e7..bd6a3fc 100644 --- a/vault_service/views/user.py +++ b/vault_service/views/user.py @@ -295,6 +295,7 @@ def _create_myads_notification(payload=None, headers=None, user_id=None): return json.dumps({'msg': 'No notification type passed'}), 400 scix_ui_header = current_app.config['SCIXPLORER_HOST'] in request.headers.get('Host', '') + get_other_papers = False with current_app.session_scope() as session: try: @@ -358,6 +359,8 @@ def _create_myads_notification(payload=None, headers=None, user_id=None): data = payload.get('data', None) stateful = False frequency = payload.get('frequency', 'daily') + if frequency == 'daily': + get_other_papers = payload.get('get_other_papers', True) if payload.get('data', None): name = '{0} - Recent Papers'.format(get_keyword_query_name(payload['data'])) else: @@ -395,6 +398,7 @@ def _create_myads_notification(payload=None, headers=None, user_id=None): template=template, classes=classes, scix_ui=scix_ui_header, + get_other_papers=get_other_papers, data=data) else: return json.dumps({'msg': 'Bad data passed; type must be query or template'}), 400 @@ -437,6 +441,9 @@ def _create_myads_notification(payload=None, headers=None, user_id=None): 'data': setup.data, 'created': setup.created.isoformat(), 'updated': setup.updated.isoformat()} + # Only include get_other_papers for daily arXiv notifications + if setup.template == 'arxiv' and setup.frequency == 'daily': + output['get_other_papers'] = setup.get_other_papers return json.dumps(output), 200 @@ -545,6 +552,9 @@ def _edit_myads_notification(payload=None, headers=None, user_id=None, myads_id= setup.active = payload.get('active', setup.active) setup.stateful = payload.get('stateful', setup.stateful) setup.frequency = payload.get('frequency', setup.frequency) + # Only update get_other_papers for daily arXiv notifications + if setup.template == 'arxiv' and setup.frequency == 'daily': + setup.get_other_papers = payload.get('get_other_papers', setup.get_other_papers) try: session.begin_nested() @@ -573,6 +583,9 @@ def _edit_myads_notification(payload=None, headers=None, user_id=None, myads_id= 'data': setup.data, 'created': setup.created.isoformat(), 'updated': setup.updated.isoformat()} + # Only include get_other_papers for daily arXiv notifications + if setup.template == 'arxiv' and setup.frequency == 'daily': + output['get_other_papers'] = setup.get_other_papers return json.dumps(output), 200 @@ -606,7 +619,7 @@ def execute_myads_query(myads_id): data = setup.data if data is None and setup.query_id: data = _get_general_query_data(session, setup.query_id) - query = _create_myads_query(setup.template, setup.frequency, data, classes=setup.classes) + query = _create_myads_query(setup.template, setup.frequency, data, classes=setup.classes, get_other_papers=setup.get_other_papers) return json.dumps(query) @@ -626,12 +639,13 @@ def _get_general_query_data(session, query_id): data = urlparse.parse_qs(query) return data -def _create_myads_query(template_type, frequency, data, classes=None, start_isodate=None): +def _create_myads_query(template_type, frequency, data, classes=None, start_isodate=None, get_other_papers=True): """ Creates a query based on the stored myADS setup (for templated queries only) :param frequency: daily or weekly :param data: keywords or other stored query template data :param classes: arXiv classes, only required for arXiv template queries + :param get_other_papers: for arXiv daily queries, whether to include "other recent papers" query :return: out: list of dicts; constructed query, dates are such that it's meant to be run today: [{q: query params, sort: sort string}] @@ -668,9 +682,14 @@ def _create_myads_query(template_type, frequency, data, classes=None, start_isod classes = 'arxiv_class:(' + ' OR '.join([x + '.*' if '.' not in x else x for x in tmp]) + ')' keywords = data if frequency == 'daily': - connector = [' ', ' NOT '] - # keyword search should be sorted by score, "other recent" should be sorted by bibcode - sort_w_keywords = ['score desc, date desc', 'date desc'] + if get_other_papers: + connector = [' ', ' NOT '] + # keyword search should be sorted by score, "other recent" should be sorted by bibcode + sort_w_keywords = ['score desc, date desc', 'date desc'] + else: + # Only include keyword matches, skip "other recent papers" + connector = [' '] + sort_w_keywords = ['score desc, date desc'] elif frequency == 'weekly': connector = [' '] sort_w_keywords = ['score desc, date desc'] @@ -781,11 +800,11 @@ def get_myads(user_id, start_isodate=None): query = None else: data = _get_general_query_data(session, s.query_id) - query = _create_myads_query(s.template, s.frequency, data, classes=s.classes, start_isodate=start_isodate) + query = _create_myads_query(s.template, s.frequency, data, classes=s.classes, start_isodate=start_isodate, get_other_papers=s.get_other_papers) else: qid = None data = s.data - query = _create_myads_query(s.template, s.frequency, data, classes=s.classes, start_isodate=start_isodate) + query = _create_myads_query(s.template, s.frequency, data, classes=s.classes, start_isodate=start_isodate, get_other_papers=s.get_other_papers) o['qid'] = qid o['query'] = query From da5042e4462a5414ed5b82d0d5d7f631124a3cc9 Mon Sep 17 00:00:00 2001 From: femalves Date: Mon, 7 Jul 2025 13:08:28 -0400 Subject: [PATCH 04/15] added tests and fixed minor inconsistency --- vault_service/tests/test_user.py | 283 +++++++++++++++++++++++++++++++ vault_service/views/user.py | 11 +- 2 files changed, 291 insertions(+), 3 deletions(-) diff --git a/vault_service/tests/test_user.py b/vault_service/tests/test_user.py index 0736cdf..339b609 100644 --- a/vault_service/tests/test_user.py +++ b/vault_service/tests/test_user.py @@ -974,5 +974,288 @@ def test_scixplorer_referrer_updates_all_notifications(self): self.assertTrue(notification2.scix_ui) self.assertTrue(notification3.scix_ui) + @httpretty.activate + def test_get_other_papers_flag_creation(self): + '''Tests creation of arXiv daily notifications with get_other_papers flag''' + httpretty.register_uri( + httpretty.GET, self.app.config.get('VAULT_SOLR_QUERY_ENDPOINT'), + content_type='application/json', + status=200, + body="""{ + "responseHeader":{ + "status":0, "QTime":0, + "params":{ "fl":"title,bibcode", "indent":"true", "wt":"json", "q":"*:*"}}, + "response":{"numFound":10456930,"start":0,"docs":[ + { "bibcode":"2005JGRC..110.4002G" }, + { "bibcode":"2005JGRC..110.4003N" }, + { "bibcode":"2005JGRC..110.4004Y" }]}}""") + + # Test 1: Create arXiv daily notification with default get_other_papers=True + r = self.client.post(url_for('user.myads_notifications'), + headers={'Authorization': 'secret', 'X-api-uid': '100'}, + data=json.dumps({'type': 'template', + 'template': 'arxiv', + 'data': 'dark matter', + 'classes': ['astro-ph.CO'], + 'frequency': 'daily'}), + content_type='application/json') + + self.assertStatus(r, 200) + self.assertEqual(r.json['template'], 'arxiv') + self.assertEqual(r.json['frequency'], 'daily') + self.assertEqual(r.json['get_other_papers'], True) # Should default to True + myads_id_default = r.json['id'] + + # Test 2: Create arXiv daily notification with explicit get_other_papers=False + r = self.client.post(url_for('user.myads_notifications'), + headers={'Authorization': 'secret', 'X-api-uid': '100'}, + data=json.dumps({'type': 'template', + 'template': 'arxiv', + 'data': 'black holes', + 'classes': ['astro-ph.HE'], + 'frequency': 'daily', + 'get_other_papers': False}), + content_type='application/json') + + self.assertStatus(r, 200) + self.assertEqual(r.json['template'], 'arxiv') + self.assertEqual(r.json['frequency'], 'daily') + self.assertEqual(r.json['get_other_papers'], False) + myads_id_false = r.json['id'] + + # Test 3: Create arXiv weekly notification (should not include get_other_papers in response) + r = self.client.post(url_for('user.myads_notifications'), + headers={'Authorization': 'secret', 'X-api-uid': '100'}, + data=json.dumps({'type': 'template', + 'template': 'arxiv', + 'data': 'galaxies', + 'classes': ['astro-ph.GA'], + 'frequency': 'weekly'}), + content_type='application/json') + + self.assertStatus(r, 200) + self.assertEqual(r.json['template'], 'arxiv') + self.assertEqual(r.json['frequency'], 'weekly') + self.assertNotIn('get_other_papers', r.json) # Should not be included for weekly + + # Test 4: Create non-arxiv template (should not include get_other_papers) + r = self.client.post(url_for('user.myads_notifications'), + headers={'Authorization': 'secret', 'X-api-uid': '100'}, + data=json.dumps({'type': 'template', + 'template': 'keyword', + 'data': 'machine learning'}), + content_type='application/json') + + self.assertStatus(r, 200) + self.assertEqual(r.json['template'], 'keyword') + self.assertNotIn('get_other_papers', r.json) # Should not be included for keyword template + + @httpretty.activate + def test_get_other_papers_flag_editing(self): + '''Tests editing the get_other_papers flag for arXiv daily notifications''' + httpretty.register_uri( + httpretty.GET, self.app.config.get('VAULT_SOLR_QUERY_ENDPOINT'), + content_type='application/json', + status=200, + body="""{ + "responseHeader":{ + "status":0, "QTime":0, + "params":{ "fl":"title,bibcode", "indent":"true", "wt":"json", "q":"*:*"}}, + "response":{"numFound":10456930,"start":0,"docs":[ + { "bibcode":"2005JGRC..110.4002G" }]}}""") + + # Create an arXiv daily notification + r = self.client.post(url_for('user.myads_notifications'), + headers={'Authorization': 'secret', 'X-api-uid': '101'}, + data=json.dumps({'type': 'template', + 'template': 'arxiv', + 'data': 'neutron stars', + 'classes': ['astro-ph.HE'], + 'frequency': 'daily'}), + content_type='application/json') + + self.assertStatus(r, 200) + myads_id = r.json['id'] + self.assertEqual(r.json['get_other_papers'], True) # Default + + # Edit to disable get_other_papers + r = self.client.put(url_for('user.myads_notifications', myads_id=myads_id), + headers={'Authorization': 'secret', 'X-api-uid': '101'}, + data=json.dumps({'get_other_papers': False}), + content_type='application/json') + + self.assertStatus(r, 200) + self.assertEqual(r.json['get_other_papers'], False) + + # Edit to re-enable get_other_papers + r = self.client.put(url_for('user.myads_notifications', myads_id=myads_id), + headers={'Authorization': 'secret', 'X-api-uid': '101'}, + data=json.dumps({'get_other_papers': True}), + content_type='application/json') + + self.assertStatus(r, 200) + self.assertEqual(r.json['get_other_papers'], True) + + # Test editing a weekly arXiv notification (should not affect get_other_papers) + r = self.client.post(url_for('user.myads_notifications'), + headers={'Authorization': 'secret', 'X-api-uid': '101'}, + data=json.dumps({'type': 'template', + 'template': 'arxiv', + 'data': 'cosmology', + 'classes': ['astro-ph.CO'], + 'frequency': 'weekly'}), + content_type='application/json') + + self.assertStatus(r, 200) + weekly_myads_id = r.json['id'] + + # Try to edit get_other_papers on weekly notification (should be ignored) + r = self.client.put(url_for('user.myads_notifications', myads_id=weekly_myads_id), + headers={'Authorization': 'secret', 'X-api-uid': '101'}, + data=json.dumps({'get_other_papers': False}), + content_type='application/json') + + self.assertStatus(r, 200) + self.assertNotIn('get_other_papers', r.json) # Should not be included for weekly + + def test_get_other_papers_query_generation(self): + '''Tests that _create_myads_query generates correct queries based on get_other_papers flag''' + from vault_service.views.user import _create_myads_query + + # Test 1: Daily arXiv with keywords, get_other_papers=True (should create 2 queries) + queries = _create_myads_query('arxiv', 'daily', 'dark matter', + classes=['astro-ph.CO'], get_other_papers=True) + + self.assertEqual(len(queries), 2, "Should create 2 queries when get_other_papers=True") + + # First query: keyword matches + self.assertIn('dark matter', queries[0]['q']) + self.assertNotIn(' NOT ', queries[0]['q']) + self.assertEqual(queries[0]['sort'], 'score desc, date desc') + + # Second query: other recent papers (NOT keyword matches) + self.assertIn('dark matter', queries[1]['q']) + self.assertIn(' NOT ', queries[1]['q']) + self.assertEqual(queries[1]['sort'], 'date desc') + + # Test 2: Daily arXiv with keywords, get_other_papers=False (should create 1 query) + queries = _create_myads_query('arxiv', 'daily', 'dark matter', + classes=['astro-ph.CO'], get_other_papers=False) + + self.assertEqual(len(queries), 1, "Should create 1 query when get_other_papers=False") + + # Only query: keyword matches + self.assertIn('dark matter', queries[0]['q']) + self.assertNotIn(' NOT ', queries[0]['q']) + self.assertEqual(queries[0]['sort'], 'score desc, date desc') + + # Test 3: Weekly arXiv (get_other_papers should not affect) + queries_weekly_true = _create_myads_query('arxiv', 'weekly', 'dark matter', + classes=['astro-ph.CO'], get_other_papers=True) + queries_weekly_false = _create_myads_query('arxiv', 'weekly', 'dark matter', + classes=['astro-ph.CO'], get_other_papers=False) + + self.assertEqual(len(queries_weekly_true), 1, "Weekly should always create 1 query") + self.assertEqual(len(queries_weekly_false), 1, "Weekly should always create 1 query") + self.assertEqual(queries_weekly_true[0]['q'], queries_weekly_false[0]['q']) + + # Test 4: Non-arXiv template (get_other_papers should not affect) + queries_keyword = _create_myads_query('keyword', 'weekly', 'machine learning', + get_other_papers=False) + self.assertEqual(len(queries_keyword), 3, "Keyword template should create 3 queries (recent, trending, useful)") + + def test_get_myads_includes_get_other_papers_flag(self): + '''Tests that get_myads endpoint includes get_other_papers flag appropriately''' + with self.app.session_scope() as session: + # Create test data directly in database + user = User(id=102) + session.add(user) + session.commit() + + # Daily arXiv notification with get_other_papers=True + myads_daily_true = MyADS( + user_id=102, + type='template', + name='Daily True', + template='arxiv', + frequency='daily', + active=True, + stateful=False, + classes=['astro-ph.CO'], + data='dark matter', + get_other_papers=True + ) + session.add(myads_daily_true) + + # Daily arXiv notification with get_other_papers=False + myads_daily_false = MyADS( + user_id=102, + type='template', + name='Daily False', + template='arxiv', + frequency='daily', + active=True, + stateful=False, + classes=['astro-ph.HE'], + data='black holes', + get_other_papers=False + ) + session.add(myads_daily_false) + + # Weekly arXiv notification (should not include get_other_papers) + myads_weekly = MyADS( + user_id=102, + type='template', + name='Weekly', + template='arxiv', + frequency='weekly', + active=True, + stateful=False, + classes=['astro-ph.GA'], + data='galaxies', + get_other_papers=False # Not relevant for weekly + ) + session.add(myads_weekly) + + # Keyword notification (should not include get_other_papers) + myads_keyword = MyADS( + user_id=102, + type='template', + name='Keyword', + template='keyword', + frequency='weekly', + active=True, + stateful=False, + data='machine learning', + get_other_papers=False # Not relevant for keyword + ) + session.add(myads_keyword) + + session.commit() + + # Get all myADS setups for user 102 + r = self.client.get(url_for('user.get_myads', user_id='102'), + headers={'Authorization': 'secret'}) + + self.assertStatus(r, 200) + self.assertEqual(len(r.json), 4) + + # # Find each notification and check get_other_papers field + daily_true = next(n for n in r.json if n['name'] == 'Daily True') + daily_false = next(n for n in r.json if n['name'] == 'Daily False') + weekly = next(n for n in r.json if n['name'] == 'Weekly') + keyword = next(n for n in r.json if n['name'] == 'Keyword') + + # # Daily arXiv notifications should include get_other_papers + self.assertIn('get_other_papers', daily_true) + self.assertEqual(daily_true['get_other_papers'], True) + + self.assertIn('get_other_papers', daily_false) + self.assertEqual(daily_false['get_other_papers'], False) + + # # Weekly arXiv and keyword notifications should NOT include get_other_papers + self.assertNotIn('get_other_papers', weekly) + self.assertNotIn('get_other_papers', keyword) + if __name__ == '__main__': unittest.main() diff --git a/vault_service/views/user.py b/vault_service/views/user.py index bd6a3fc..4abebea 100644 --- a/vault_service/views/user.py +++ b/vault_service/views/user.py @@ -552,9 +552,10 @@ def _edit_myads_notification(payload=None, headers=None, user_id=None, myads_id= setup.active = payload.get('active', setup.active) setup.stateful = payload.get('stateful', setup.stateful) setup.frequency = payload.get('frequency', setup.frequency) - # Only update get_other_papers for daily arXiv notifications - if setup.template == 'arxiv' and setup.frequency == 'daily': - setup.get_other_papers = payload.get('get_other_papers', setup.get_other_papers) + + # Only update get_other_papers for daily arXiv notifications and if the payload has a value + if setup.template == 'arxiv' and setup.frequency == 'daily' and payload.get('get_other_papers', None) is not None: + setup.get_other_papers = payload.get('get_other_papers') try: session.begin_nested() @@ -583,6 +584,7 @@ def _edit_myads_notification(payload=None, headers=None, user_id=None, myads_id= 'data': setup.data, 'created': setup.created.isoformat(), 'updated': setup.updated.isoformat()} + # Only include get_other_papers for daily arXiv notifications if setup.template == 'arxiv' and setup.frequency == 'daily': output['get_other_papers'] = setup.get_other_papers @@ -804,10 +806,13 @@ def get_myads(user_id, start_isodate=None): else: qid = None data = s.data + if s.template == 'arxiv' and s.frequency == 'daily': + o['get_other_papers'] = s.get_other_papers query = _create_myads_query(s.template, s.frequency, data, classes=s.classes, start_isodate=start_isodate, get_other_papers=s.get_other_papers) o['qid'] = qid o['query'] = query + output.append(o) From 2717ef245ecfcca1bc2978371dd1f949967532f6 Mon Sep 17 00:00:00 2001 From: femalves Date: Wed, 9 Jul 2025 12:56:57 -0400 Subject: [PATCH 05/15] Remove incorrect migration and add correct get_other_papers_flag migration --- ...=> dcda14f51cff_add_get_other_papers_flag.py} | 16 +++++++++++----- config.py | 2 +- vault_service/models.py | 1 + 3 files changed, 13 insertions(+), 6 deletions(-) rename alembic/versions/{f1a2b3c4d5e6_add_get_other_papers_flag.py => dcda14f51cff_add_get_other_papers_flag.py} (69%) diff --git a/alembic/versions/f1a2b3c4d5e6_add_get_other_papers_flag.py b/alembic/versions/dcda14f51cff_add_get_other_papers_flag.py similarity index 69% rename from alembic/versions/f1a2b3c4d5e6_add_get_other_papers_flag.py rename to alembic/versions/dcda14f51cff_add_get_other_papers_flag.py index 1aee6bb..6ca1676 100644 --- a/alembic/versions/f1a2b3c4d5e6_add_get_other_papers_flag.py +++ b/alembic/versions/dcda14f51cff_add_get_other_papers_flag.py @@ -1,20 +1,26 @@ -"""Add get_other_papers flag to myads table +"""Add get_other_papers_flag -Revision ID: f1a2b3c4d5e6 +Revision ID: dcda14f51cff Revises: 4cc89f2f896b -Create Date: 2025-01-11 12:00:00.000000 +Create Date: 2025-07-09 12:36:39.558910 """ # revision identifiers, used by Alembic. -revision = 'f1a2b3c4d5e6' +revision = 'dcda14f51cff' down_revision = '4cc89f2f896b' from alembic import op import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + def upgrade(): + #with app.app_context() as c: + # db.session.add(Model()) + # db.session.commit() + # ### commands auto generated by Alembic - please adjust! ### op.add_column('myads', sa.Column('get_other_papers', sa.Boolean(), server_default=sa.text('true'), nullable=True)) op.execute("UPDATE myads SET get_other_papers = TRUE WHERE get_other_papers IS NULL") @@ -25,4 +31,4 @@ def upgrade(): def downgrade(): # ### commands auto generated by Alembic - please adjust! ### op.drop_column('myads', 'get_other_papers') - # ### end Alembic commands ### \ No newline at end of file + # ### end Alembic commands ### diff --git a/config.py b/config.py index e52b75d..fceddfb 100644 --- a/config.py +++ b/config.py @@ -6,7 +6,7 @@ VAULT_VERSION = 'v0.1' # Arbitrary string identifying the service (will be returned in the headers) -SQLALCHEMY_DATABASE_URI = "postgresql+psycopg2://postgres:postgres@localhost:5432/test_vault" +SQLALCHEMY_DATABASE_URI = "postgresql+psycopg2://vault:newpassword@localhost:5432/vault" SQLALCHEMY_ECHO = False API_ENDPOINT = 'https://api.adsabs.harvard.edu' diff --git a/vault_service/models.py b/vault_service/models.py index 462ca08..0d488de 100644 --- a/vault_service/models.py +++ b/vault_service/models.py @@ -25,6 +25,7 @@ class User(Base): id = Column(Integer, primary_key=True) name = Column(String(255)) user_data = Column(MutableDict.as_mutable(JSONB)) + library_id = Column(Integer, ForeignKey('library.id'), nullable=True) created = Column(UTCDateTime, default=get_date) updated = Column(UTCDateTime, default=get_date, onupdate=get_date) From 2dd85a67ad6deb0b4aafd75b1bc44299040899a9 Mon Sep 17 00:00:00 2001 From: femalves Date: Tue, 15 Jul 2025 16:18:20 -0400 Subject: [PATCH 06/15] changing model and revision --- ...dd_get_other_papers_flag_to_myads_table.py | 76 +++++++++++++++++++ .../dcda14f51cff_add_get_other_papers_flag.py | 34 --------- vault_service/models.py | 2 +- 3 files changed, 77 insertions(+), 35 deletions(-) create mode 100644 alembic/versions/717c2970ff42_add_get_other_papers_flag_to_myads_table.py delete mode 100644 alembic/versions/dcda14f51cff_add_get_other_papers_flag.py diff --git a/alembic/versions/717c2970ff42_add_get_other_papers_flag_to_myads_table.py b/alembic/versions/717c2970ff42_add_get_other_papers_flag_to_myads_table.py new file mode 100644 index 0000000..572f84a --- /dev/null +++ b/alembic/versions/717c2970ff42_add_get_other_papers_flag_to_myads_table.py @@ -0,0 +1,76 @@ +"""Add get_other_papers flag to myads table + +Revision ID: 717c2970ff42 +Revises: 4cc89f2f896b +Create Date: 2025-07-15 16:14:53.689180 + +""" + +# revision identifiers, used by Alembic. +revision = '717c2970ff42' +down_revision = '4cc89f2f896b' + +from alembic import op +import sqlalchemy as sa + + +from sqlalchemy.dialects import postgresql + +def upgrade(): + #with app.app_context() as c: + # db.session.add(Model()) + # db.session.commit() + + # ### commands auto generated by Alembic - please adjust! ### + op.add_column('myads', sa.Column('get_other_papers', sa.Boolean(), server_default=sa.text('false'), nullable=True)) + op.alter_column('myads', 'active', + existing_type=sa.BOOLEAN(), + nullable=True) + op.alter_column('myads', 'frequency', + existing_type=postgresql.ENUM('daily', 'weekly', name='myads_frequency'), + nullable=True) + op.alter_column('myads', 'name', + existing_type=sa.VARCHAR(), + nullable=True) + op.alter_column('myads', 'scix_ui', + existing_type=sa.BOOLEAN(), + nullable=True, + existing_server_default=sa.text('false')) + op.alter_column('myads', 'stateful', + existing_type=sa.BOOLEAN(), + nullable=True) + op.alter_column('myads', 'type', + existing_type=postgresql.ENUM('template', 'query', name='myads_type'), + nullable=True) + op.alter_column('myads', 'user_id', + existing_type=sa.INTEGER(), + nullable=True) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.alter_column('myads', 'user_id', + existing_type=sa.INTEGER(), + nullable=False) + op.alter_column('myads', 'type', + existing_type=postgresql.ENUM('template', 'query', name='myads_type'), + nullable=False) + op.alter_column('myads', 'stateful', + existing_type=sa.BOOLEAN(), + nullable=False) + op.alter_column('myads', 'scix_ui', + existing_type=sa.BOOLEAN(), + nullable=False, + existing_server_default=sa.text('false')) + op.alter_column('myads', 'name', + existing_type=sa.VARCHAR(), + nullable=False) + op.alter_column('myads', 'frequency', + existing_type=postgresql.ENUM('daily', 'weekly', name='myads_frequency'), + nullable=False) + op.alter_column('myads', 'active', + existing_type=sa.BOOLEAN(), + nullable=False) + op.drop_column('myads', 'get_other_papers') + # ### end Alembic commands ### diff --git a/alembic/versions/dcda14f51cff_add_get_other_papers_flag.py b/alembic/versions/dcda14f51cff_add_get_other_papers_flag.py deleted file mode 100644 index 6ca1676..0000000 --- a/alembic/versions/dcda14f51cff_add_get_other_papers_flag.py +++ /dev/null @@ -1,34 +0,0 @@ -"""Add get_other_papers_flag - -Revision ID: dcda14f51cff -Revises: 4cc89f2f896b -Create Date: 2025-07-09 12:36:39.558910 - -""" - -# revision identifiers, used by Alembic. -revision = 'dcda14f51cff' -down_revision = '4cc89f2f896b' - -from alembic import op -import sqlalchemy as sa - - -from sqlalchemy.dialects import postgresql - -def upgrade(): - #with app.app_context() as c: - # db.session.add(Model()) - # db.session.commit() - - # ### commands auto generated by Alembic - please adjust! ### - op.add_column('myads', sa.Column('get_other_papers', sa.Boolean(), server_default=sa.text('true'), nullable=True)) - op.execute("UPDATE myads SET get_other_papers = TRUE WHERE get_other_papers IS NULL") - op.alter_column('myads', 'get_other_papers', nullable=False) - # ### end Alembic commands ### - - -def downgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.drop_column('myads', 'get_other_papers') - # ### end Alembic commands ### diff --git a/vault_service/models.py b/vault_service/models.py index 0d488de..b872da2 100644 --- a/vault_service/models.py +++ b/vault_service/models.py @@ -81,7 +81,7 @@ class MyADS(Base): name = Column(String) active = Column(Boolean) scix_ui = Column(Boolean, nullable=True, server_default=sa.text('false'), default=False) - get_other_papers = Column(Boolean, nullable=True, server_default=sa.text('true'), default=True) + get_other_papers = Column(Boolean, nullable=True, server_default=sa.text('false'), default=False) stateful = Column(Boolean) frequency = Column(myads_frequency) template = Column(myads_template, nullable=True) From 3cb6dea56e36067c74af78e3a643693ccaf36955 Mon Sep 17 00:00:00 2001 From: femalves Date: Tue, 15 Jul 2025 16:19:43 -0400 Subject: [PATCH 07/15] removing unwanted alter_column commands --- ...dd_get_other_papers_flag_to_myads_table.py | 44 ------------------- 1 file changed, 44 deletions(-) diff --git a/alembic/versions/717c2970ff42_add_get_other_papers_flag_to_myads_table.py b/alembic/versions/717c2970ff42_add_get_other_papers_flag_to_myads_table.py index 572f84a..0c07db6 100644 --- a/alembic/versions/717c2970ff42_add_get_other_papers_flag_to_myads_table.py +++ b/alembic/versions/717c2970ff42_add_get_other_papers_flag_to_myads_table.py @@ -23,54 +23,10 @@ def upgrade(): # ### commands auto generated by Alembic - please adjust! ### op.add_column('myads', sa.Column('get_other_papers', sa.Boolean(), server_default=sa.text('false'), nullable=True)) - op.alter_column('myads', 'active', - existing_type=sa.BOOLEAN(), - nullable=True) - op.alter_column('myads', 'frequency', - existing_type=postgresql.ENUM('daily', 'weekly', name='myads_frequency'), - nullable=True) - op.alter_column('myads', 'name', - existing_type=sa.VARCHAR(), - nullable=True) - op.alter_column('myads', 'scix_ui', - existing_type=sa.BOOLEAN(), - nullable=True, - existing_server_default=sa.text('false')) - op.alter_column('myads', 'stateful', - existing_type=sa.BOOLEAN(), - nullable=True) - op.alter_column('myads', 'type', - existing_type=postgresql.ENUM('template', 'query', name='myads_type'), - nullable=True) - op.alter_column('myads', 'user_id', - existing_type=sa.INTEGER(), - nullable=True) # ### end Alembic commands ### def downgrade(): # ### commands auto generated by Alembic - please adjust! ### - op.alter_column('myads', 'user_id', - existing_type=sa.INTEGER(), - nullable=False) - op.alter_column('myads', 'type', - existing_type=postgresql.ENUM('template', 'query', name='myads_type'), - nullable=False) - op.alter_column('myads', 'stateful', - existing_type=sa.BOOLEAN(), - nullable=False) - op.alter_column('myads', 'scix_ui', - existing_type=sa.BOOLEAN(), - nullable=False, - existing_server_default=sa.text('false')) - op.alter_column('myads', 'name', - existing_type=sa.VARCHAR(), - nullable=False) - op.alter_column('myads', 'frequency', - existing_type=postgresql.ENUM('daily', 'weekly', name='myads_frequency'), - nullable=False) - op.alter_column('myads', 'active', - existing_type=sa.BOOLEAN(), - nullable=False) op.drop_column('myads', 'get_other_papers') # ### end Alembic commands ### From 60916c3256b85031aa8bc1e09047c99cff3e3da2 Mon Sep 17 00:00:00 2001 From: femalves Date: Tue, 15 Jul 2025 16:22:38 -0400 Subject: [PATCH 08/15] setting nullable to False --- .../717c2970ff42_add_get_other_papers_flag_to_myads_table.py | 2 +- vault_service/models.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/alembic/versions/717c2970ff42_add_get_other_papers_flag_to_myads_table.py b/alembic/versions/717c2970ff42_add_get_other_papers_flag_to_myads_table.py index 0c07db6..9a6456b 100644 --- a/alembic/versions/717c2970ff42_add_get_other_papers_flag_to_myads_table.py +++ b/alembic/versions/717c2970ff42_add_get_other_papers_flag_to_myads_table.py @@ -22,7 +22,7 @@ def upgrade(): # db.session.commit() # ### commands auto generated by Alembic - please adjust! ### - op.add_column('myads', sa.Column('get_other_papers', sa.Boolean(), server_default=sa.text('false'), nullable=True)) + op.add_column('myads', sa.Column('get_other_papers', sa.Boolean(), server_default=sa.text('false'), nullable=False)) # ### end Alembic commands ### diff --git a/vault_service/models.py b/vault_service/models.py index b872da2..0d7388a 100644 --- a/vault_service/models.py +++ b/vault_service/models.py @@ -81,7 +81,7 @@ class MyADS(Base): name = Column(String) active = Column(Boolean) scix_ui = Column(Boolean, nullable=True, server_default=sa.text('false'), default=False) - get_other_papers = Column(Boolean, nullable=True, server_default=sa.text('false'), default=False) + get_other_papers = Column(Boolean, nullable=False, server_default=sa.text('false'), default=False) stateful = Column(Boolean) frequency = Column(myads_frequency) template = Column(myads_template, nullable=True) From dbc2486a43a463ee6fa0ed93170899266498f880 Mon Sep 17 00:00:00 2001 From: femalves Date: Tue, 15 Jul 2025 16:24:28 -0400 Subject: [PATCH 09/15] Remove config.py from tracking --- config.py | 97 ------------------------------------------------------- 1 file changed, 97 deletions(-) delete mode 100644 config.py diff --git a/config.py b/config.py deleted file mode 100644 index fceddfb..0000000 --- a/config.py +++ /dev/null @@ -1,97 +0,0 @@ -LOG_STDOUT = True - -# must be here for adsmutils to override it using env vars -# but if left empty (resolving to False) it won't be used -SERVICE_TOKEN = None - -VAULT_VERSION = 'v0.1' # Arbitrary string identifying the service (will be returned in the headers) - -SQLALCHEMY_DATABASE_URI = "postgresql+psycopg2://vault:newpassword@localhost:5432/vault" -SQLALCHEMY_ECHO = False - -API_ENDPOINT = 'https://api.adsabs.harvard.edu' - -# location of the remote solr-microservice -VAULT_SOLR_QUERY_ENDPOINT = API_ENDPOINT + '/v1/search/query' -VAULT_SOLR_BIGQUERY_ENDPOINT = API_ENDPOINT + '/v1/search/bigquery' - -USER_EMAIL_ADSWS_API_URL = API_ENDPOINT + '/v1/user/%s' - -# alembic will -use_flask_db_url = True - -# a json object holding whatever values we need for the bumblebee -# users; this typically is stored in consul and the microservice -# just exposes it to bbb -VAULT_BUMBLEBEE_OPTIONS = {} - -# limits on the size of the JSON doc stored for user preferences -MAX_ALLOWED_JSON_SIZE = 1000 -MAX_ALLOWED_JSON_KEYS = 100 - -# user_id for anonymous users - fix in deployment config -BOOTSTRAP_USER_ID = 0 - -# import endpoints -HARBOUR_MYADS_IMPORT_ENDPOINT = 'https://api.adsabs.harvard.edu/v1/harbour/myads/classic/%s' -MYADS_DAILY_TIME_RANGE = 2 # days -MYADS_WEEKLY_TIME_RANGE = 6 # days - -# scixplorer host -SCIXPLORER_HOST = 'scixplorer.org' - -# arXiv categories and sub-categories -ALLOWED_ARXIV_CLASSES = ['astro-ph', - 'astro-ph.GA', 'astro-ph.CO', 'astro-ph.EP', 'astro-ph.HE', 'astro-ph.IM', 'astro-ph.SR', - 'cond-mat', - 'cond-mat.dis-nn', 'cond-mat.mtrl-sci', 'cond-mat.mes-hall', 'cond-mat.other', - 'cond-mat.quant-gas', 'cond-mat.soft', 'cond-mat.stat-mech', 'cond-mat.str-el', - 'cond-mat.supr-con', - 'gr-qc', - 'hep-ex', - 'hep-lat', - 'hep-ph', - 'hep-th', - 'math-ph', - 'nlin', - 'nlin.AO', 'nlin.CG', 'nlin.CD', 'nlin.SI', 'nlin.PS', - 'nucl-ex', - 'nucl-th', - 'physics', - 'physics.acc-ph', 'physics.app-ph', 'physics.ao-ph', 'physics.atm-clus', 'physics.atom-ph', - 'physics.bio-ph', 'physics.chem-ph', 'physics.class-ph', 'physics.comp-ph', 'physics.data-an', - 'physics.flu-dyn', 'physics.gen-ph', 'physics.geo-ph', 'physics.hist-ph', 'physics.ins-det', - 'physics.med-ph', 'physics.optics', 'physics.soc-ph', 'physics.ed-ph', 'physics.plasm-ph', - 'physics.pop-ph', 'physics.space-ph', - 'quant-ph', - 'math', - 'math.AG', 'math.AT', 'math.AP', 'math.CT', 'math.CA', 'math.CO', 'math.AC', 'math.CV', - 'math.DG', 'math.DS', 'math.FA', 'math.GM', 'math.GN', 'math.GT', 'math.GR', 'math.HO', - 'math.IT', 'math.KT', 'math.LO', 'math.MP', 'math.MG', 'math.NT', 'math.NA', 'math.OA', - 'math.OC', 'math.PR', 'math.QA', 'math.RT', 'math.RA', 'math.SP', 'math.ST', 'math.SG', - 'cs', - 'cs.AI', 'cs.CL', 'cs.CC', 'cs.CE', 'cs.CG', 'cs.GT', 'cs.CV', 'cs.CY', 'cs.CR', 'cs.DS', - 'cs.DB', 'cs.DL', 'cs.DM', 'cs.DC', 'cs.ET', 'cs.FL', 'cs.GL', 'cs.GR', 'cs.AR', 'cs.HC', - 'cs.IR', 'cs.IT', 'cs.LO', 'cs.LG', 'cs.MS', 'cs.MA', 'cs.MM', 'cs.NI', 'cs.NE', 'cs.NA', - 'cs.OS', 'cs.OH', 'cs.PF', 'cs.PL', 'cs.RO', 'cs.SI', 'cs.SE', 'cs.SD', 'cs.SC', 'cs.SY', - 'q-bio', - 'q-bio.BM', 'q-bio.CB', 'q-bio.GN', 'q-bio.MN', 'q-bio.NC', 'q-bio.OT', 'q-bio.PE', 'q-bio.QM', - 'q-bio.SC', 'q-bio.TO', - 'q-fin', - 'q-fin.CP', 'q-fin.EC', 'q-fin.GN', 'q-fin.MF', 'q-fin.PM', 'q-fin.PR', 'q-fin.RM', 'q-fin.ST', - 'q-fin.TR', - 'stat', - 'stat.AP', 'stat.CO', 'stat.ML', 'stat.ME', 'stat.OT', 'stat.TH', - 'eess', - 'eess.AS', 'eess.IV', 'eess.SP', 'eess.SY', - 'econ', - 'econ.EM', 'econ.GN', 'econ.TH'] - -# harbour db connection for import script -POSTGRES_HARBOUR = { - 'port': 1234, - 'host': 'localhost', - 'user': 'harbour', - 'database': 'harbour', - 'password': 'fix-me' - } From d729d4f6616bb51a77b437abec5917268c953bb8 Mon Sep 17 00:00:00 2001 From: femalves Date: Tue, 15 Jul 2025 16:27:44 -0400 Subject: [PATCH 10/15] Restore config.py to repository --- config.py | 97 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 config.py diff --git a/config.py b/config.py new file mode 100644 index 0000000..fceddfb --- /dev/null +++ b/config.py @@ -0,0 +1,97 @@ +LOG_STDOUT = True + +# must be here for adsmutils to override it using env vars +# but if left empty (resolving to False) it won't be used +SERVICE_TOKEN = None + +VAULT_VERSION = 'v0.1' # Arbitrary string identifying the service (will be returned in the headers) + +SQLALCHEMY_DATABASE_URI = "postgresql+psycopg2://vault:newpassword@localhost:5432/vault" +SQLALCHEMY_ECHO = False + +API_ENDPOINT = 'https://api.adsabs.harvard.edu' + +# location of the remote solr-microservice +VAULT_SOLR_QUERY_ENDPOINT = API_ENDPOINT + '/v1/search/query' +VAULT_SOLR_BIGQUERY_ENDPOINT = API_ENDPOINT + '/v1/search/bigquery' + +USER_EMAIL_ADSWS_API_URL = API_ENDPOINT + '/v1/user/%s' + +# alembic will +use_flask_db_url = True + +# a json object holding whatever values we need for the bumblebee +# users; this typically is stored in consul and the microservice +# just exposes it to bbb +VAULT_BUMBLEBEE_OPTIONS = {} + +# limits on the size of the JSON doc stored for user preferences +MAX_ALLOWED_JSON_SIZE = 1000 +MAX_ALLOWED_JSON_KEYS = 100 + +# user_id for anonymous users - fix in deployment config +BOOTSTRAP_USER_ID = 0 + +# import endpoints +HARBOUR_MYADS_IMPORT_ENDPOINT = 'https://api.adsabs.harvard.edu/v1/harbour/myads/classic/%s' +MYADS_DAILY_TIME_RANGE = 2 # days +MYADS_WEEKLY_TIME_RANGE = 6 # days + +# scixplorer host +SCIXPLORER_HOST = 'scixplorer.org' + +# arXiv categories and sub-categories +ALLOWED_ARXIV_CLASSES = ['astro-ph', + 'astro-ph.GA', 'astro-ph.CO', 'astro-ph.EP', 'astro-ph.HE', 'astro-ph.IM', 'astro-ph.SR', + 'cond-mat', + 'cond-mat.dis-nn', 'cond-mat.mtrl-sci', 'cond-mat.mes-hall', 'cond-mat.other', + 'cond-mat.quant-gas', 'cond-mat.soft', 'cond-mat.stat-mech', 'cond-mat.str-el', + 'cond-mat.supr-con', + 'gr-qc', + 'hep-ex', + 'hep-lat', + 'hep-ph', + 'hep-th', + 'math-ph', + 'nlin', + 'nlin.AO', 'nlin.CG', 'nlin.CD', 'nlin.SI', 'nlin.PS', + 'nucl-ex', + 'nucl-th', + 'physics', + 'physics.acc-ph', 'physics.app-ph', 'physics.ao-ph', 'physics.atm-clus', 'physics.atom-ph', + 'physics.bio-ph', 'physics.chem-ph', 'physics.class-ph', 'physics.comp-ph', 'physics.data-an', + 'physics.flu-dyn', 'physics.gen-ph', 'physics.geo-ph', 'physics.hist-ph', 'physics.ins-det', + 'physics.med-ph', 'physics.optics', 'physics.soc-ph', 'physics.ed-ph', 'physics.plasm-ph', + 'physics.pop-ph', 'physics.space-ph', + 'quant-ph', + 'math', + 'math.AG', 'math.AT', 'math.AP', 'math.CT', 'math.CA', 'math.CO', 'math.AC', 'math.CV', + 'math.DG', 'math.DS', 'math.FA', 'math.GM', 'math.GN', 'math.GT', 'math.GR', 'math.HO', + 'math.IT', 'math.KT', 'math.LO', 'math.MP', 'math.MG', 'math.NT', 'math.NA', 'math.OA', + 'math.OC', 'math.PR', 'math.QA', 'math.RT', 'math.RA', 'math.SP', 'math.ST', 'math.SG', + 'cs', + 'cs.AI', 'cs.CL', 'cs.CC', 'cs.CE', 'cs.CG', 'cs.GT', 'cs.CV', 'cs.CY', 'cs.CR', 'cs.DS', + 'cs.DB', 'cs.DL', 'cs.DM', 'cs.DC', 'cs.ET', 'cs.FL', 'cs.GL', 'cs.GR', 'cs.AR', 'cs.HC', + 'cs.IR', 'cs.IT', 'cs.LO', 'cs.LG', 'cs.MS', 'cs.MA', 'cs.MM', 'cs.NI', 'cs.NE', 'cs.NA', + 'cs.OS', 'cs.OH', 'cs.PF', 'cs.PL', 'cs.RO', 'cs.SI', 'cs.SE', 'cs.SD', 'cs.SC', 'cs.SY', + 'q-bio', + 'q-bio.BM', 'q-bio.CB', 'q-bio.GN', 'q-bio.MN', 'q-bio.NC', 'q-bio.OT', 'q-bio.PE', 'q-bio.QM', + 'q-bio.SC', 'q-bio.TO', + 'q-fin', + 'q-fin.CP', 'q-fin.EC', 'q-fin.GN', 'q-fin.MF', 'q-fin.PM', 'q-fin.PR', 'q-fin.RM', 'q-fin.ST', + 'q-fin.TR', + 'stat', + 'stat.AP', 'stat.CO', 'stat.ML', 'stat.ME', 'stat.OT', 'stat.TH', + 'eess', + 'eess.AS', 'eess.IV', 'eess.SP', 'eess.SY', + 'econ', + 'econ.EM', 'econ.GN', 'econ.TH'] + +# harbour db connection for import script +POSTGRES_HARBOUR = { + 'port': 1234, + 'host': 'localhost', + 'user': 'harbour', + 'database': 'harbour', + 'password': 'fix-me' + } From 7a35f69c5ca9a0253cd4de81a3647841bc42456f Mon Sep 17 00:00:00 2001 From: femalves Date: Tue, 15 Jul 2025 16:39:34 -0400 Subject: [PATCH 11/15] Restore config.py (keep locally) --- config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config.py b/config.py index fceddfb..e52b75d 100644 --- a/config.py +++ b/config.py @@ -6,7 +6,7 @@ VAULT_VERSION = 'v0.1' # Arbitrary string identifying the service (will be returned in the headers) -SQLALCHEMY_DATABASE_URI = "postgresql+psycopg2://vault:newpassword@localhost:5432/vault" +SQLALCHEMY_DATABASE_URI = "postgresql+psycopg2://postgres:postgres@localhost:5432/test_vault" SQLALCHEMY_ECHO = False API_ENDPOINT = 'https://api.adsabs.harvard.edu' From 690b37f8f305b4eca040cae22a7806499f34e470 Mon Sep 17 00:00:00 2001 From: femalves Date: Tue, 15 Jul 2025 16:40:58 -0400 Subject: [PATCH 12/15] removing library_id from this commit --- vault_service/models.py | 1 - 1 file changed, 1 deletion(-) diff --git a/vault_service/models.py b/vault_service/models.py index 0d7388a..72bac34 100644 --- a/vault_service/models.py +++ b/vault_service/models.py @@ -25,7 +25,6 @@ class User(Base): id = Column(Integer, primary_key=True) name = Column(String(255)) user_data = Column(MutableDict.as_mutable(JSONB)) - library_id = Column(Integer, ForeignKey('library.id'), nullable=True) created = Column(UTCDateTime, default=get_date) updated = Column(UTCDateTime, default=get_date, onupdate=get_date) From aa21f7215661cdc479ca28149aeb3a08d4e48953 Mon Sep 17 00:00:00 2001 From: femalves Date: Tue, 15 Jul 2025 17:23:53 -0400 Subject: [PATCH 13/15] changing tests --- vault_service/tests/test_user.py | 26 +++++++++++++------------- vault_service/views/user.py | 2 +- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/vault_service/tests/test_user.py b/vault_service/tests/test_user.py index 339b609..0c1fe7f 100644 --- a/vault_service/tests/test_user.py +++ b/vault_service/tests/test_user.py @@ -990,7 +990,7 @@ def test_get_other_papers_flag_creation(self): { "bibcode":"2005JGRC..110.4003N" }, { "bibcode":"2005JGRC..110.4004Y" }]}}""") - # Test 1: Create arXiv daily notification with default get_other_papers=True + # Test 1: Create arXiv daily notification with default get_other_papers=False r = self.client.post(url_for('user.myads_notifications'), headers={'Authorization': 'secret', 'X-api-uid': '100'}, data=json.dumps({'type': 'template', @@ -1003,10 +1003,10 @@ def test_get_other_papers_flag_creation(self): self.assertStatus(r, 200) self.assertEqual(r.json['template'], 'arxiv') self.assertEqual(r.json['frequency'], 'daily') - self.assertEqual(r.json['get_other_papers'], True) # Should default to True + self.assertEqual(r.json['get_other_papers'], False) # Should default to False myads_id_default = r.json['id'] - # Test 2: Create arXiv daily notification with explicit get_other_papers=False + # Test 2: Create arXiv daily notification with explicit get_other_papers=True r = self.client.post(url_for('user.myads_notifications'), headers={'Authorization': 'secret', 'X-api-uid': '100'}, data=json.dumps({'type': 'template', @@ -1014,13 +1014,13 @@ def test_get_other_papers_flag_creation(self): 'data': 'black holes', 'classes': ['astro-ph.HE'], 'frequency': 'daily', - 'get_other_papers': False}), + 'get_other_papers': True}), content_type='application/json') self.assertStatus(r, 200) self.assertEqual(r.json['template'], 'arxiv') self.assertEqual(r.json['frequency'], 'daily') - self.assertEqual(r.json['get_other_papers'], False) + self.assertEqual(r.json['get_other_papers'], True) myads_id_false = r.json['id'] # Test 3: Create arXiv weekly notification (should not include get_other_papers in response) @@ -1076,25 +1076,25 @@ def test_get_other_papers_flag_editing(self): self.assertStatus(r, 200) myads_id = r.json['id'] - self.assertEqual(r.json['get_other_papers'], True) # Default + self.assertEqual(r.json['get_other_papers'], False) # Default - # Edit to disable get_other_papers + # Edit to enable get_other_papers r = self.client.put(url_for('user.myads_notifications', myads_id=myads_id), headers={'Authorization': 'secret', 'X-api-uid': '101'}, - data=json.dumps({'get_other_papers': False}), + data=json.dumps({'get_other_papers': True}), content_type='application/json') self.assertStatus(r, 200) - self.assertEqual(r.json['get_other_papers'], False) + self.assertEqual(r.json['get_other_papers'], True) - # Edit to re-enable get_other_papers + # Edit to disable get_other_papers r = self.client.put(url_for('user.myads_notifications', myads_id=myads_id), headers={'Authorization': 'secret', 'X-api-uid': '101'}, - data=json.dumps({'get_other_papers': True}), + data=json.dumps({'get_other_papers': False}), content_type='application/json') self.assertStatus(r, 200) - self.assertEqual(r.json['get_other_papers'], True) + self.assertEqual(r.json['get_other_papers'], False) # Test editing a weekly arXiv notification (should not affect get_other_papers) r = self.client.post(url_for('user.myads_notifications'), @@ -1258,4 +1258,4 @@ def test_get_myads_includes_get_other_papers_flag(self): self.assertNotIn('get_other_papers', keyword) if __name__ == '__main__': - unittest.main() + unittest.main() \ No newline at end of file diff --git a/vault_service/views/user.py b/vault_service/views/user.py index 4abebea..6308df5 100644 --- a/vault_service/views/user.py +++ b/vault_service/views/user.py @@ -360,7 +360,7 @@ def _create_myads_notification(payload=None, headers=None, user_id=None): stateful = False frequency = payload.get('frequency', 'daily') if frequency == 'daily': - get_other_papers = payload.get('get_other_papers', True) + get_other_papers = payload.get('get_other_papers', False) if payload.get('data', None): name = '{0} - Recent Papers'.format(get_keyword_query_name(payload['data'])) else: From 4142f836f0dd900ed46fc4607b7c1845191d5643 Mon Sep 17 00:00:00 2001 From: femalves Date: Fri, 18 Jul 2025 16:30:54 -0400 Subject: [PATCH 14/15] changing default to true --- ...0ff42_add_get_other_papers_flag_to_myads_table.py | 2 +- vault_service/models.py | 4 ++-- vault_service/tests/test_user.py | 12 ++++++------ vault_service/views/user.py | 4 ++-- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/alembic/versions/717c2970ff42_add_get_other_papers_flag_to_myads_table.py b/alembic/versions/717c2970ff42_add_get_other_papers_flag_to_myads_table.py index 9a6456b..d78f5c5 100644 --- a/alembic/versions/717c2970ff42_add_get_other_papers_flag_to_myads_table.py +++ b/alembic/versions/717c2970ff42_add_get_other_papers_flag_to_myads_table.py @@ -22,7 +22,7 @@ def upgrade(): # db.session.commit() # ### commands auto generated by Alembic - please adjust! ### - op.add_column('myads', sa.Column('get_other_papers', sa.Boolean(), server_default=sa.text('false'), nullable=False)) + op.add_column('myads', sa.Column('get_other_papers', sa.Boolean(), server_default=sa.text('true'), nullable=False)) # ### end Alembic commands ### diff --git a/vault_service/models.py b/vault_service/models.py index 72bac34..d325e6e 100644 --- a/vault_service/models.py +++ b/vault_service/models.py @@ -5,7 +5,7 @@ Models for the users (users) of AdsWS """ -from sqlalchemy import Column, Integer, String, LargeBinary, TIMESTAMP, ForeignKey, Boolean, Text +from sqlalchemy import Column, Integer, String, LargeBinary, ForeignKey, Boolean, Text from sqlalchemy.dialects.postgresql import JSONB, ENUM, ARRAY from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.ext.mutable import MutableDict @@ -80,7 +80,7 @@ class MyADS(Base): name = Column(String) active = Column(Boolean) scix_ui = Column(Boolean, nullable=True, server_default=sa.text('false'), default=False) - get_other_papers = Column(Boolean, nullable=False, server_default=sa.text('false'), default=False) + get_other_papers = Column(Boolean, nullable=False, server_default=sa.text('true'), default=True) stateful = Column(Boolean) frequency = Column(myads_frequency) template = Column(myads_template, nullable=True) diff --git a/vault_service/tests/test_user.py b/vault_service/tests/test_user.py index 4a83df1..fcb8a51 100644 --- a/vault_service/tests/test_user.py +++ b/vault_service/tests/test_user.py @@ -1022,7 +1022,7 @@ def test_get_other_papers_flag_creation(self): { "bibcode":"2005JGRC..110.4003N" }, { "bibcode":"2005JGRC..110.4004Y" }]}}""") - # Test 1: Create arXiv daily notification with default get_other_papers=False + # Test 1: Create arXiv daily notification with default get_other_papers=True r = self.client.post(url_for('user.myads_notifications'), headers={'Authorization': 'secret', 'X-api-uid': '100'}, data=json.dumps({'type': 'template', @@ -1035,10 +1035,10 @@ def test_get_other_papers_flag_creation(self): self.assertStatus(r, 200) self.assertEqual(r.json['template'], 'arxiv') self.assertEqual(r.json['frequency'], 'daily') - self.assertEqual(r.json['get_other_papers'], False) # Should default to False + self.assertEqual(r.json['get_other_papers'], True) # Should default to True myads_id_default = r.json['id'] - # Test 2: Create arXiv daily notification with explicit get_other_papers=True + # Test 2: Create arXiv daily notification with explicit get_other_papers=False r = self.client.post(url_for('user.myads_notifications'), headers={'Authorization': 'secret', 'X-api-uid': '100'}, data=json.dumps({'type': 'template', @@ -1046,13 +1046,13 @@ def test_get_other_papers_flag_creation(self): 'data': 'black holes', 'classes': ['astro-ph.HE'], 'frequency': 'daily', - 'get_other_papers': True}), + 'get_other_papers': False}), content_type='application/json') self.assertStatus(r, 200) self.assertEqual(r.json['template'], 'arxiv') self.assertEqual(r.json['frequency'], 'daily') - self.assertEqual(r.json['get_other_papers'], True) + self.assertEqual(r.json['get_other_papers'], False) myads_id_false = r.json['id'] # Test 3: Create arXiv weekly notification (should not include get_other_papers in response) @@ -1108,7 +1108,7 @@ def test_get_other_papers_flag_editing(self): self.assertStatus(r, 200) myads_id = r.json['id'] - self.assertEqual(r.json['get_other_papers'], False) # Default + self.assertEqual(r.json['get_other_papers'], True) # Default # Edit to enable get_other_papers r = self.client.put(url_for('user.myads_notifications', myads_id=myads_id), diff --git a/vault_service/views/user.py b/vault_service/views/user.py index 4928c29..adea8ef 100644 --- a/vault_service/views/user.py +++ b/vault_service/views/user.py @@ -295,7 +295,7 @@ def _create_myads_notification(payload=None, headers=None, user_id=None): return json.dumps({'msg': 'No notification type passed'}), 400 scix_ui_header = urlparse.urlparse(request.referrer).netloc in current_app.config.get("NECTAR_REFERRERS", ["dev.scixplorer.org"]) - get_other_papers = False + get_other_papers = True with current_app.session_scope() as session: try: @@ -360,7 +360,7 @@ def _create_myads_notification(payload=None, headers=None, user_id=None): stateful = False frequency = payload.get('frequency', 'daily') if frequency == 'daily': - get_other_papers = payload.get('get_other_papers', False) + get_other_papers = payload.get('get_other_papers', True) if payload.get('data', None): name = '{0} - Recent Papers'.format(get_keyword_query_name(payload['data'])) else: From 9cfb3cd7806167bcebaf1c239d9b4ccfb014fd09 Mon Sep 17 00:00:00 2001 From: femalves Date: Mon, 11 Aug 2025 14:12:02 -0400 Subject: [PATCH 15/15] fixing migrations --- ...dd_get_other_papers_flag_to_myads_table.py | 4 +- ...9_adding_library_id_foreign_key_to_user.py | 73 +++++++++++++++++++ 2 files changed, 75 insertions(+), 2 deletions(-) create mode 100644 alembic/versions/af63c0205b19_adding_library_id_foreign_key_to_user.py diff --git a/alembic/versions/717c2970ff42_add_get_other_papers_flag_to_myads_table.py b/alembic/versions/717c2970ff42_add_get_other_papers_flag_to_myads_table.py index d78f5c5..98c9e6a 100644 --- a/alembic/versions/717c2970ff42_add_get_other_papers_flag_to_myads_table.py +++ b/alembic/versions/717c2970ff42_add_get_other_papers_flag_to_myads_table.py @@ -1,14 +1,14 @@ """Add get_other_papers flag to myads table Revision ID: 717c2970ff42 -Revises: 4cc89f2f896b +Revises: af63c0205b19 Create Date: 2025-07-15 16:14:53.689180 """ # revision identifiers, used by Alembic. revision = '717c2970ff42' -down_revision = '4cc89f2f896b' +down_revision = 'af63c0205b19' from alembic import op import sqlalchemy as sa diff --git a/alembic/versions/af63c0205b19_adding_library_id_foreign_key_to_user.py b/alembic/versions/af63c0205b19_adding_library_id_foreign_key_to_user.py new file mode 100644 index 0000000..c8931c0 --- /dev/null +++ b/alembic/versions/af63c0205b19_adding_library_id_foreign_key_to_user.py @@ -0,0 +1,73 @@ +"""Adding library_id foreign key to user + +Revision ID: af63c0205b19 +Revises: 4cc89f2f896b +Create Date: 2025-07-09 15:00:40.189587 + +""" + +# revision identifiers, used by Alembic. +revision = 'af63c0205b19' +down_revision = '4cc89f2f896b' + +from alembic import op +import sqlalchemy as sa +import json + +from sqlalchemy.dialects import postgresql + +def upgrade(): + #with app.app_context() as c: + # db.session.add(Model()) + # db.session.commit() + + # ### commands auto generated by Alembic - please adjust! ### + # Add foreign key constraint to users table + op.add_column('users', sa.Column('library_id', sa.Integer(), nullable=True)) + op.create_foreign_key(None, 'users', 'library', ['library_id'], ['id']) + + + bind = op.get_bind() + # Get all users with link_server in user_data + # Delete link_server from user_data + # But save the link_server in the library table instead to be accessible via users.library_id + + + users = bind.execute("SELECT id, user_data FROM users WHERE user_data ? 'link_server'") + + for user_id, user_data in users: + + if user_data and isinstance(user_data, dict): + link_server = user_data.get('link_server') + if link_server: + library = bind.execute( + "SELECT id FROM library WHERE libserver = %s", (link_server,) + ).fetchone() + + library_id = library[0] if library else None + + new_user_data = user_data.copy() + new_user_data.pop('link_server', None) + + bind.execute( + "UPDATE users SET user_data = %s, library_id = %s WHERE id = %s", + (new_user_data, library_id, user_id) + ) + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + + bind = op.get_bind() + + result = bind.execute("SELECT u.id, u.user_data, l.libserver FROM users u JOIN library l ON u.library_id = l.id WHERE u.library_id IS NOT NULL") + + for user_id, user_data, libserver in result: + user_data = user_data or {} + if isinstance(user_data, dict): + if libserver is not None: + user_data['link_server'] = libserver + bind.execute("UPDATE users SET user_data = %s WHERE id = %s", (json.dumps(user_data), user_id)) + + op.drop_constraint(None, 'users', type_='foreignkey') + op.drop_column('users', 'library_id') + # ### end Alembic commands ###