Skip to content

Commit

Permalink
fix event data generation for pagination test, update doc strings in …
Browse files Browse the repository at this point in the history
…sync
  • Loading branch information
leslievandemark committed Dec 17, 2024
1 parent 19689fc commit 3be60d7
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 50 deletions.
35 changes: 0 additions & 35 deletions tap_github/sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,41 +126,6 @@ def translate_state(state, catalog, repositories):

for key in previous_state_keys:
# Loop through each key of `bookmarks` available in the previous state.

# Case 1:
# Older connections `bookmarks` contain stream names so check if it is the stream name or not.
# If the previous state's key is found in the stream name list then continue to check other keys. Because we want
# to migrate each stream's bookmark into the repo name as mentioned below:
# Example: {`bookmarks`: {`stream_a`: `bookmark_a`}} to {`bookmarks`: {`repo_a`: {`stream_a`: `bookmark_a`}}}

# Case 2:
# Check if the key is available in the list of currently selected repo's list or not. Newer format `bookmarks` contain repo names.
# Return the state if the previous state's key is not found in the repo name list or stream name list.

# If the state contains a bookmark for `repo_a` and `repo_b` and the user deselects these both repos and adds another repo
# then in that case this function was returning an empty state. Now this change will return the existing state instead of the empty state.

# old state
# {
# "bookmarks": {
# "org/test-repo3": {
# "comments": {"since": "2019-01-01T00:00:00Z"}
# }
# }
# }
# for each repo, check each stream under the repo. If the stream is not in stream names or repositories return state.
# stream should always be in stream_names

# new state
# {
# "bookmarks": {
# "comments" : {
# "org/test-repo3": {"since": "2019-01-01T00:00:00Z"},
# },
# }
# }
# for each stream, loop over repos in stream. If the repo is not a stream name (it wont be) or is not is the list of repos, reutrn state. This could happen, and is the case we are checking for. If the repositories are not selected, new ones will get added the new bookmark way.

for inner_key in state['bookmarks'][key].keys():
if inner_key not in stream_names and inner_key not in repositories:
# Return the existing state if all repos from the previous state are deselected(not found) in the current sync.
Expand Down
39 changes: 24 additions & 15 deletions tests/test_github_start_date.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import os
import requests
import json
from tap_tester import connections, runner, LOGGER

from base import TestGithubBase
Expand All @@ -19,33 +20,41 @@ def name():
def generate_data(self):
# get the token
token = os.getenv("TAP_GITHUB_TOKEN")
url = "https://api.github.com/user/starred/singer-io/test-repo"
headers = {"Authorization": "Bearer {}".format(token)}

# generate a data for 'events' stream: 'watchEvent' ie. star the repo
requests.put(url=url, headers=headers)
# as per the Documentation: https://docs.github.com/en/developers/webhooks-and-events/events/github-event-types#watchevent
# the event is generated when we 'star' a repo, hence 'unstar' it as we can 'star' it next time
requests.delete(url=url, headers=headers)
url = "https://api.github.com/repos/singer-io/test-repo/issues"
headers = {"Authorization": "Bearer {}".format(token),
'Accept': 'application/vnd.github+json'}
data = {
"title": "Test Issue",
"body": "This is a test issue for tap-github pagination test"}
# create and close an issue to generate new event data
response = requests.post(url=url, headers=headers, data=json.dumps(data))
if response.status_code == 201:
issue_number = response.json()['number']
else:
print(f"Failed to create issue: {response.status_code}, {response.text}")

delete_url = f'https://api.github.com/repos/singer-io/test-repo/issues/{issue_number}'
delete_data = {'state': 'closed'}
requests.patch(url=delete_url, headers=headers, data=json.dumps(delete_data))

def test_run(self):
# generate data for 'events' stream
self.generate_data()

date_1 = '2023-04-01T00:00:00Z'
date_2 = '2024-10-08T00:00:00Z'
date_1 = '2020-04-01T00:00:00Z'
date_2 = '2021-10-08T00:00:00Z'
expected_stream_1 = {'commits'}
self.run_test(date_1, date_2, expected_stream_1)

date_2 = '2024-07-13T00:00:00Z'
date_2 = '2022-07-13T00:00:00Z'
expected_stream_2 = {'issue_milestones'}
self.run_test(date_1, date_2, expected_stream_2)

date_2 = '2024-05-06T00:00:00Z'
date_2 = '2022-05-06T00:00:00Z'
expected_stream_3 = {'pr_commits', 'review_comments', 'reviews'}
self.run_test(date_1, date_2, expected_stream_3)

date_2 = '2024-01-27T00:00:00Z'
date_2 = '2022-01-27T00:00:00Z'
expected_stream_4 = self.expected_streams().difference(
expected_stream_1,
expected_stream_2,
Expand All @@ -58,10 +67,10 @@ def test_run(self):
# `issues` doesn't have enough data in this range, so we skip it too
self.run_test(date_1, date_2, expected_stream_4)

date_3 = '2024-01-27T00:00:00Z'
date_3 = '2023-01-27T00:00:00Z'
self.run_test(date_1, date_3, {"issues"})

date_4 = '2024-01-01T00:00:00Z'
date_4 = '2023-01-01T00:00:00Z'
self.run_test(date_1, date_4, {'pull_requests'})

# As per the Documentation: https://docs.github.com/en/rest/reference/activity#events
Expand Down

0 comments on commit 3be60d7

Please sign in to comment.