Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Follow up #37

Merged
merged 11 commits into from
Jan 19, 2025
4 changes: 2 additions & 2 deletions INSTALL.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

- [Docker](https://docs.docker.com/get-docker/) `19.0.3` or later.
- [Docker Compose](https://docs.docker.com/compose/install/) (`docker compose` or `docker-compose`) `2.6.1` or later.
- OpenAI API key (for text generation and embeddings). Get it from [here](https://platform.openai.com/api-keys).
- OpenAI API key (for answer generation and embeddings). Get it from [here](https://platform.openai.com/api-keys).
- Firecrawl API key (for website scraping). Get it from [here](https://www.firecrawl.dev/app/api-keys).

### Quick Install
Expand Down Expand Up @@ -131,4 +131,4 @@ Here's a detailed comparison between Gurubase Cloud and Self-hosted versions:
| Knowledge Base Sources | ✅ Websites, YouTube, PDFs | ✅ Websites, YouTube, PDFs |
| GitHub Codebase Indexing | ✅ Available | ✅ Available |
| Website Widget | ✅ Available | ✅ Available |
| Base LLM | ✅ OpenAI GPT-4o | ✅ OpenAI GPT-4o |
| Base LLM | ✅ OpenAI GPT-4o | ✅ OpenAI GPT-4o |
2 changes: 1 addition & 1 deletion src/gurubase-backend/backend/accounts/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def get_auth_provider(connection_name):
@auth_auth0
def auth0_user_login(request):
# Example Request Body:
# '{"auth0_id": "auth0|5f7c8ec7c33c6c004bbafe82", "email": "[email protected]", "name": "John Smith", "picture": "http://www.gravatar.com/avatar/?d=identicon", "full_request": {"connection": {"id": "con_fpe5kj482KO1eOzQ", "metadata": {}, "name": "Username-Password-Authentication", "strategy": "auth0"}, "request": {"geoip": {"cityName": "Bellevue", "continentCode": "NA", "countryCode": "US", "countryCode3": "USA", "countryName": "United States of America", "latitude": 47.61793, "longitude": -122.19584, "subdivisionCode": "WA", "subdivisionName": "Washington", "timeZone": "America/Los_Angeles"}, "hostname": "dev-o0e6bx1accmw5dg4.example.com", "ip": "13.33.86.47", "language": "en", "method": "POST", "user_agent": "curl/7.64.1"}, "tenant": {"id": "dev-o0e6bx1accmw5dg4"}, "transaction": {"acr_values": [], "id": "", "locale": "", "login_hint": "[email protected]", "prompt": ["none"], "protocol": "oauth2-access-token", "redirect_uri": "http://someuri.com", "requested_scopes": [], "response_mode": "form_post", "response_type": ["id_token"], "state": "AABBccddEEFFGGTTasrs", "ui_locales": []}, "user": {"app_metadata": {}, "created_at": "2024-11-01T12:33:27.606Z", "email": "[email protected]", "email_verified": true, "family_name": "Smith", "given_name": "John", "last_password_reset": "2024-11-01T12:33:27.606Z", "name": "John Smith", "nickname": "j+smith", "phoneNumber": "123-123-1234", "phone_number": "123-123-1234", "phone_verified": true, "picture": "http://www.gravatar.com/avatar/?d=identicon", "tenant": "dev-o0e6bx1accmw5dg4", "updated_at": "2024-11-01T12:33:27.606Z", "user_id": "auth0|5f7c8ec7c33c6c004bbafe82", "user_metadata": {}, "username": "j+smith"}, "configuration": {}, "secrets": {"AUTH0_MANAGEMENT_API_TOKEN": "ebdcad69aa2f6d6637761debf2f1bec02b023114bad350c3dce0769bd2ebb604", "BACKEND_URL": "https://fatih.ddosify.com/"}}}'
# '{"auth0_id": "auth0|5f7c8ec7c33c6c004bbafe82", "email": "[email protected]", "name": "John Smith", "picture": "http://www.gravatar.com/avatar/?d=identicon", "full_request": {"connection": {"id": "con_fpe5kj482KO1eOzQ", "metadata": {}, "name": "Username-Password-Authentication", "strategy": "auth0"}, "request": {"geoip": {"cityName": "Bellevue", "continentCode": "NA", "countryCode": "US", "countryCode3": "USA", "countryName": "United States of America", "latitude": 47.61793, "longitude": -122.19584, "subdivisionCode": "WA", "subdivisionName": "Washington", "timeZone": "America/Los_Angeles"}, "hostname": "dev-o0e6bx1accmw5dg4.example.com", "ip": "13.33.86.47", "language": "en", "method": "POST", "user_agent": "curl/7.64.1"}, "tenant": {"id": "dev-o0e6bx1accmw5dg4"}, "transaction": {"acr_values": [], "id": "", "locale": "", "login_hint": "[email protected]", "prompt": ["none"], "protocol": "oauth2-access-token", "redirect_uri": "http://someuri.com", "requested_scopes": [], "response_mode": "form_post", "response_type": ["id_token"], "state": "AABBccddEEFFGGTTasrs", "ui_locales": []}, "user": {"app_metadata": {}, "created_at": "2024-11-01T12:33:27.606Z", "email": "[email protected]", "email_verified": true, "family_name": "Smith", "given_name": "John", "last_password_reset": "2024-11-01T12:33:27.606Z", "name": "John Smith", "nickname": "j+smith", "phoneNumber": "123-123-1234", "phone_number": "123-123-1234", "phone_verified": true, "picture": "http://www.gravatar.com/avatar/?d=identicon", "tenant": "dev-o0e6bx1accmw5dg4", "updated_at": "2024-11-01T12:33:27.606Z", "user_id": "auth0|5f7c8ec7c33c6c004bbafe82", "user_metadata": {}, "username": "j+smith"}, "configuration": {}, "secrets": {"AUTH0_MANAGEMENT_API_TOKEN": "ebdcad69aa2f6d6637761debf2f1bec02b023114bad350c3dce0769bd2ebb604", "BACKEND_URL": ""}}}'

request_body = request.data

Expand Down
2 changes: 1 addition & 1 deletion src/gurubase-backend/backend/backend/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -379,7 +379,7 @@

FOLLOW_UP_QUESTION_LIMIT = config('FOLLOW_UP_QUESTION_LIMIT', default=100, cast=int)
FOLLOW_UP_QUESTION_TIME_LIMIT_SECONDS = config('FOLLOW_UP_QUESTION_TIME_LIMIT_SECONDS', default=7200, cast=int) # 2 hours
GENERATE_FOLLOW_UP_EXAMPLES = config('GENERATE_FOLLOW_UP_EXAMPLES', default=True, cast=bool)
GENERATE_FOLLOW_UP_EXAMPLES = config('GENERATE_FOLLOW_UP_EXAMPLES', default=False, cast=bool)
FOLLOW_UP_EXAMPLE_COUNT = config('FOLLOW_UP_EXAMPLE_COUNT', default=3, cast=int)

BINGE_HISTORY_PAGE_SIZE = config('BINGE_HISTORY_PAGE_SIZE', default=30, cast=int)
Expand Down
46 changes: 46 additions & 0 deletions src/gurubase-backend/backend/core/auth.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,3 +212,49 @@ def wrapper(request, *args, **kwargs):
request.auth0_id = api_key_obj.user.auth0_id
return view_func(request, *args, **kwargs)
return wrapper


def follow_up_examples_auth(view_func):
@wraps(view_func)
def wrapper(request, *args, **kwargs):
# First try JWT auth
auth_header = request.headers.get('Authorization', '')

if auth_header.startswith('Bearer '):
token = auth_header.split(' ')[1]
try:
# Fetch Auth0 public keys
jwks_url = f'{settings.AUTH0_DOMAIN}.well-known/jwks.json'
jwks_client = PyJWKClient(jwks_url)
signing_key = jwks_client.get_signing_key_from_jwt(token)

# Decode and validate the token
payload = jwt.decode(
token,
signing_key.key,
algorithms=['RS256'],
audience=settings.AUTH0_AUDIENCE,
issuer=settings.AUTH0_DOMAIN
)

auth0_id = payload['sub']
if auth0_id:
user = User.objects.filter(auth0_id=auth0_id).first()
if user:
request.auth0_id = auth0_id
request.user = user
except Exception as e:
logger.debug(f"JWT validation failed: {str(e)}")
# Continue even if JWT auth fails
pass

# Check for widget ID
widget_id = request.headers.get('Authorization')
if widget_id:
widget_id_obj = WidgetId.validate_key(widget_id)
if widget_id_obj:
request.widget = True

# Allow the request to proceed regardless of auth status
return view_func(request, *args, **kwargs)
return wrapper
4 changes: 2 additions & 2 deletions src/gurubase-backend/backend/core/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def test_anonymous_user_widget_search(self):
guru_type_object=self.guru_type,
binge=None,
slug="widget-question",
include_widget=True
only_widget=True
)
self.assertEqual(result, self.widget_question)

Expand All @@ -98,7 +98,7 @@ def test_anonymous_user_widget_search(self):
guru_type_object=self.guru_type,
binge=None,
slug="regular-question",
include_widget=True
only_widget=True
)
self.assertIsNone(result)

Expand Down
10 changes: 5 additions & 5 deletions src/gurubase-backend/backend/core/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2553,14 +2553,14 @@ def check_binge_auth(binge, user):
return True
return binge.owner == user

def search_question(user, guru_type_object, binge, slug=None, question=None, will_check_binge_auth=True, include_api=False, include_widget=False):
def search_question(user, guru_type_object, binge, slug=None, question=None, will_check_binge_auth=True, include_api=False, only_widget=False):
def get_source_conditions(user):
"""Helper function to get source conditions based on user"""
if user is None:
# For anonymous users
# API requests are not allowed
# Widget requests are allowed
if include_widget:
if only_widget:
return Q(source__in=[Question.Source.WIDGET_QUESTION.value])
else:
return ~Q(source__in=[Question.Source.API.value, Question.Source.WIDGET_QUESTION.value])
Expand Down Expand Up @@ -2866,7 +2866,7 @@ def api_ask(question: str,
is_widget = api_type == APIType.WIDGET

include_api = api_type == APIType.API
include_widget = api_type == APIType.WIDGET
only_widget = api_type == APIType.WIDGET

question_source = {
APIType.WIDGET: Question.Source.WIDGET_QUESTION.value,
Expand All @@ -2884,7 +2884,7 @@ def api_ask(question: str,
question,
will_check_binge_auth=False,
include_api=include_api,
include_widget=include_widget
only_widget=only_widget
)
if existing_question and not is_question_dirty(existing_question):
logger.info(f"Found existing question {question} for guru type {guru_type.slug}")
Expand All @@ -2902,7 +2902,7 @@ def api_ask(question: str,
question,
will_check_binge_auth=False,
include_api=include_api,
include_widget=include_widget
only_widget=only_widget
)
if existing_question and not is_question_dirty(existing_question):
logger.info(f"Found existing question with slug for {question} in guru type {guru_type.slug}")
Expand Down
27 changes: 21 additions & 6 deletions src/gurubase-backend/backend/core/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from core.requester import GeminiRequester, OpenAIRequester, RerankerRequester
from core.data_sources import PDFStrategy, WebsiteStrategy, YouTubeStrategy, GitHubRepoStrategy
from core.serializers import WidgetIdSerializer, BingeSerializer, DataSourceSerializer, GuruTypeSerializer, GuruTypeInternalSerializer, QuestionCopySerializer, FeaturedDataSourceSerializer
from core.auth import auth, jwt_auth, combined_auth, stream_combined_auth, api_key_auth
from core.auth import auth, follow_up_examples_auth, jwt_auth, combined_auth, stream_combined_auth, api_key_auth
from core.gcp import replace_media_root_with_nginx_base_url
from core.models import FeaturedDataSource, Question, ContentPageStatistics, QuestionValidityCheckPricing, Summarization, WidgetId, Binge, DataSource, GuruType
from accounts.models import User
Expand Down Expand Up @@ -1058,7 +1058,7 @@ def export_questions(request):


@api_view(['POST'])
@combined_auth
@follow_up_examples_auth
def follow_up_examples(request, guru_type):
user = request.user

Expand All @@ -1070,6 +1070,7 @@ def follow_up_examples(request, guru_type):
binge_id = request.data.get('binge_id')
question_slug = request.data.get('question_slug')
question_text = request.data.get('question')
widget = request.widget if hasattr(request, 'widget') else False

if not question_slug and not question_text:
return Response({'msg': 'Question slug is required'}, status=status.HTTP_400_BAD_REQUEST)
Expand All @@ -1082,7 +1083,7 @@ def follow_up_examples(request, guru_type):
else:
binge = None

if binge and not check_binge_auth(binge, user):
if binge and not widget and not check_binge_auth(binge, user):
return Response({'msg': 'User does not have access to this binge'}, status=status.HTTP_401_UNAUTHORIZED)

guru_type_object = get_guru_type_object(guru_type, only_active=True)
Expand All @@ -1092,7 +1093,9 @@ def follow_up_examples(request, guru_type):
guru_type_object,
binge,
question_slug,
question_text
question_text,
only_widget=widget,
will_check_binge_auth=not widget
)
if not last_question:
return Response({'msg': 'Question does not exist'}, status=status.HTTP_400_BAD_REQUEST)
Expand All @@ -1111,7 +1114,19 @@ def follow_up_examples(request, guru_type):
# Get relevant contexts from the last question
contexts = []
if last_question.processed_ctx_relevances and 'kept' in last_question.processed_ctx_relevances:
contexts = [x['context'] for x in last_question.processed_ctx_relevances['kept']]
for ctx in last_question.processed_ctx_relevances['kept']:
# Skip GitHub repo contexts
try:
# Extract metadata using regex pattern that matches any context number
context_parts = ctx['context'].split('\nContext ')
metadata_text = context_parts[1].split(' Text:')[0]
metadata_json = metadata_text.split('Metadata:\n')[1].replace("'", '"')
metadata = json.loads(metadata_json)
if metadata.get('type') == 'GITHUB_REPO':
continue
except (json.JSONDecodeError, IndexError, KeyError):
pass # If we can't parse metadata, include the context
contexts.append(ctx['context'])

if not contexts:
return Response([], status=status.HTTP_200_OK)
Expand Down Expand Up @@ -1324,7 +1339,7 @@ def ask_widget(request):
binge,
parent_slug,
will_check_binge_auth=False,
include_widget=True
only_widget=True
)
except Exception as e:
return response_handler.handle_error_response("Parent question does not exist")
Expand Down
10 changes: 2 additions & 8 deletions src/gurubase-frontend/src/components/Icons.js
Original file line number Diff line number Diff line change
Expand Up @@ -254,14 +254,8 @@ export function LineMdTwitterXAlt(props) {
xmlns="http://www.w3.org/2000/svg"
{...props}>
<path
d="M8.5 2h2.5L11 2h-2.5zM13 2h2.5L15.5 2h-2.5zM10.5 2h5v0h-5zM8.5 2h5v0h-5zM10 2h3.5L13.5 2h-3.5z"
fill="black">
<animate
attributeName="d"
dur="0.8s"
fill="freeze"
keyTimes="0;0.3;0.5;1"
values="M8.5 2h2.5L11 2h-2.5zM13 2h2.5L15.5 2h-2.5zM10.5 2h5v0h-5zM8.5 2h5v0h-5zM10 2h3.5L13.5 2h-3.5z;M8.5 2h2.5L11 22h-2.5zM13 2h2.5L15.5 22h-2.5zM10.5 2h5v2h-5zM8.5 20h5v2h-5zM10 2h3.5L13.5 22h-3.5z;M8.5 2h2.5L11 22h-2.5zM13 2h2.5L15.5 22h-2.5zM10.5 2h5v2h-5zM8.5 20h5v2h-5zM10 2h3.5L13.5 22h-3.5z"></animate>
d="M18.205 2.25h3.308l-7.227 8.26 8.502 11.24H16.13l-5.214-6.817L4.95 21.75H1.64l7.73-8.835L1.215 2.25H8.04l4.713 6.231zm-1.161 17.52h1.833L7.045 4.126H5.078z"
fill="currentColor">
</path>
</svg>
);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ const MobileOtherGurus = ({ allGuruTypes, isLongGuruName }) => {
}}
onClick={() => setIsPanelOpen(true)}>
<div className="self-stretch my-auto">
{getGuruPromptMap(guruType, allGuruTypes)} Guru
{getGuruPromptMap(guruType, allGuruTypes)}
</div>
<Icon icon="tabler:chevron-down" />
</div>
Expand Down
Loading