From 02351a1b888270ae543b8472737be327bbaa7e35 Mon Sep 17 00:00:00 2001 From: Kursat Aktas Date: Sat, 8 Mar 2025 18:40:42 +0300 Subject: [PATCH] Selfhosted fixes (#172) Fix selfhosted guru creation adding multiple repository Fix crawling on guru creation. The guru limit still applies for other crawls Remove unnecessary guru slug arg on crawl stop/status endpoints Fix widget follow up / binge GitHub reindex status update fix Fix empty data source sidebar not resetting the pending urls Add sitemap parse confirmation Fix deleting individual urls on sidebar Fix binge map mobile button not being hidden for bot responses Fix pdf links on selfhosted (ui/widget/slack/api/discord) Fix discord referencing with sources having underscores in their name Fix guru icon update still showing "pending changes" Fix the pdf references in the answer (not the references section) --------- Co-authored-by: aralyekta --- src/gurubase-backend/backend/backend/urls.py | 8 +- .../backend/core/data_sources.py | 38 +++++---- src/gurubase-backend/backend/core/gcp.py | 23 +++--- .../management/commands/discordListener.py | 4 +- .../0060_alter_crawlstate_guru_type.py | 19 +++++ src/gurubase-backend/backend/core/models.py | 2 +- src/gurubase-backend/backend/core/prompts.py | 2 +- .../backend/core/serializers.py | 2 +- src/gurubase-backend/backend/core/tasks.py | 9 ++- src/gurubase-backend/backend/core/utils.py | 4 +- src/gurubase-backend/backend/core/views.py | 16 ++-- src/gurubase-frontend/src/app/actions.js | 8 +- .../src/components/Content/index.js | 5 +- .../src/components/GuruEditPageSidebar.jsx | 4 +- .../NewEditGuru/MonacoUrlEditor.jsx | 58 +++++++++----- ....jsx => ProcessStopConfirmationDialog.jsx} | 37 +++++++-- .../components/NewEditGuru/SourceDialog.jsx | 79 +++++++++++++++---- .../NewEditGuru/UrlTableContent.jsx | 7 +- .../src/components/NewGuru.jsx | 12 ++- .../src/components/WidgetId.js | 21 +++-- src/gurubase-frontend/src/hooks/useCrawler.js | 6 +- 21 files changed, 245 insertions(+), 119 deletions(-) create mode 100644 src/gurubase-backend/backend/core/migrations/0060_alter_crawlstate_guru_type.py rename src/gurubase-frontend/src/components/NewEditGuru/{CrawlStopConfirmationDialog.jsx => ProcessStopConfirmationDialog.jsx} (61%) diff --git a/src/gurubase-backend/backend/backend/urls.py b/src/gurubase-backend/backend/backend/urls.py index b1e99c99..e27045fa 100644 --- a/src/gurubase-backend/backend/backend/urls.py +++ b/src/gurubase-backend/backend/backend/urls.py @@ -75,8 +75,8 @@ path('analytics/', include('analytics.urls')), path('/crawl/start/', core_views.start_crawl_admin, name='start_crawl_admin'), - path('/crawl//stop/', core_views.stop_crawl_admin, name='stop_crawl_admin'), - path('/crawl//status/', core_views.get_crawl_status_admin, name='get_crawl_status_admin'), + path('crawl//stop/', core_views.stop_crawl_admin, name='stop_crawl_admin'), + path('crawl//status/', core_views.get_crawl_status_admin, name='get_crawl_status_admin'), ] if settings.STREAM_ENABLED: @@ -86,10 +86,14 @@ ] if settings.ENV == 'selfhosted': urlpatterns += [ + # Define the urls that are accessed by the selfhosted nginx proxy ('localhost:8029/api/') path('api//answer/', core_views.answer, name="answer-api"), path('api/analytics/', include('analytics.urls')), path('api/widget/ask/', core_views.ask_widget, name='ask_widget_api'), path('api/widget/guru/', core_views.get_guru_visuals, name='get_guru_visuals_api'), + path('api/widget/binge/', core_views.widget_create_binge, name='widget_create_binge_api'), + path('api//follow_up/examples/', core_views.follow_up_examples, name='follow_up_examples_api'), + path('api/slack/events/', core_views.slack_events, name='slack_events_api'), path('settings/', core_views.manage_settings, name='manage_settings'), # New settings endpoint ] diff --git a/src/gurubase-backend/backend/core/data_sources.py b/src/gurubase-backend/backend/core/data_sources.py index 5edc2db8..ede14376 100644 --- a/src/gurubase-backend/backend/core/data_sources.py +++ b/src/gurubase-backend/backend/core/data_sources.py @@ -61,7 +61,6 @@ def youtube_content_extraction(youtube_url): def pdf_content_extraction(pdf_path): try: - pdf_path = replace_media_root_with_nginx_base_url(pdf_path) loader = PyPDFLoader(pdf_path) pages = loader.load() except Exception as e: @@ -563,36 +562,44 @@ def start_crawl(guru_slug, user, url, source=CrawlState.Source.API): return {'msg': 'Invalid URL format'}, 400 user = CrawlService.get_user(user) - guru_type = CrawlService.validate_and_get_guru_type(guru_slug, user) + try: + guru_type = CrawlService.validate_and_get_guru_type(guru_slug, user) + link_limit = guru_type.website_count_limit + except NotFoundError as e: + if source == CrawlState.Source.UI: + guru_type = None + link_limit = 1500 + else: + raise e # Existing crawl start logic - existing_crawl = CrawlState.objects.filter( - guru_type=guru_type, - status=CrawlState.Status.RUNNING - ).first() - if existing_crawl: - return {'msg': 'A crawl is already running for this guru type. Please wait for it to complete or stop it.'}, 400 + if guru_type: + existing_crawl = CrawlState.objects.filter( + guru_type=guru_type, + status=CrawlState.Status.RUNNING + ).first() + if existing_crawl: + return {'msg': 'A crawl is already running for this guru type. Please wait for it to complete or stop it.'}, 400 crawl_state = CrawlState.objects.create( url=url, status=CrawlState.Status.RUNNING, - link_limit=guru_type.website_count_limit, + link_limit=link_limit, guru_type=guru_type, user=user, source=source ) - crawl_website.delay(url, crawl_state.id, guru_type.website_count_limit) + crawl_website.delay(url, crawl_state.id, link_limit) return CrawlStateSerializer(crawl_state).data, 200 @staticmethod - def stop_crawl(guru_slug, user, crawl_id): + def stop_crawl(user, crawl_id): from core.serializers import CrawlStateSerializer user = CrawlService.get_user(user) - guru_type = CrawlService.validate_and_get_guru_type(guru_slug, user) # Existing stop logic try: - crawl_state = CrawlState.objects.get(id=crawl_id, guru_type=guru_type) + crawl_state = CrawlState.objects.get(id=crawl_id) if crawl_state.status == CrawlState.Status.RUNNING: crawl_state.status = CrawlState.Status.STOPPED crawl_state.end_time = datetime.now(UTC) @@ -602,14 +609,13 @@ def stop_crawl(guru_slug, user, crawl_id): return {'msg': 'Crawl not found'}, 404 @staticmethod - def get_crawl_status(guru_slug, user, crawl_id): + def get_crawl_status(user, crawl_id): from core.serializers import CrawlStateSerializer user = CrawlService.get_user(user) - guru_type = CrawlService.validate_and_get_guru_type(guru_slug, user) # Existing status logic try: - crawl_state = CrawlState.objects.get(id=crawl_id, guru_type=guru_type) + crawl_state = CrawlState.objects.get(id=crawl_id) # Update last_polled_at crawl_state.last_polled_at = datetime.now(UTC) crawl_state.save(update_fields=['last_polled_at']) diff --git a/src/gurubase-backend/backend/core/gcp.py b/src/gurubase-backend/backend/core/gcp.py index 39a0e0f7..f6b44eea 100644 --- a/src/gurubase-backend/backend/core/gcp.py +++ b/src/gurubase-backend/backend/core/gcp.py @@ -1,6 +1,6 @@ import logging from django.conf import settings - +import traceback from django.core.files.storage import FileSystemStorage as DjangoFileSystemStorage # storage = GoogleCloudStorage() @@ -8,20 +8,25 @@ def replace_media_root_with_nginx_base_url(url): + # TODO: Update this when selfhosted url setting is added if settings.ENV == 'selfhosted': # Replace also for development environment - url = url.replace(settings.MEDIA_ROOT, f'{settings.NGINX_BASE_URL}/media') - url = url.replace("/workspace/backend/media", f'{settings.NGINX_BASE_URL}/media') - return url + if not url: + logger.error("URL is None", traceback.format_exc()) + return '' + path = url.split(settings.MEDIA_ROOT)[1] + return f'{settings.NGINX_BASE_URL}/media{path}' return url -def replace_media_root_with_localhost(url): +def replace_media_root_with_base_url(url): + # TODO: Update this when selfhosted url setting is added if settings.ENV == 'selfhosted': - port = settings.NGINX_BASE_URL[settings.NGINX_BASE_URL.rfind(":"):][1:] + if not url: + logger.error("URL is None", traceback.format_exc()) + return '' # Replace also for development environment - url = url.replace(settings.MEDIA_ROOT, f'http://localhost:{port}/media') - url = url.replace("/workspace/backend/media", f'http://localhost:{port}/media') - return url + path = url.split(settings.MEDIA_ROOT)[1] + return f'{settings.BASE_URL}/media{path}' return url diff --git a/src/gurubase-backend/backend/core/management/commands/discordListener.py b/src/gurubase-backend/backend/core/management/commands/discordListener.py index 5067ee32..834caaab 100644 --- a/src/gurubase-backend/backend/core/management/commands/discordListener.py +++ b/src/gurubase-backend/backend/core/management/commands/discordListener.py @@ -78,7 +78,7 @@ def format_response(self, response): clean_title ).strip() - formatted_msg.append(f"• [_{clean_title}_](<{ref['link']}>)") + formatted_msg.append(f"• [*{clean_title}*](<{ref['link']}>)") # Add space for frontend link formatted_msg.append(f":eyes: [_View on Gurubase for a better UX_](<{response['question_url']}>)") @@ -453,7 +453,7 @@ async def on_message(message): ' ', clean_title ).strip() - metadata += f"\n• [_{clean_title}_](<{ref['link']}>)" + metadata += f"\n• [*{clean_title}*](<{ref['link']}>)" metadata += f"\n:eyes: [_View on Gurubase for a better UX_](<{response['question_url']}>)" diff --git a/src/gurubase-backend/backend/core/migrations/0060_alter_crawlstate_guru_type.py b/src/gurubase-backend/backend/core/migrations/0060_alter_crawlstate_guru_type.py new file mode 100644 index 00000000..da4ad17f --- /dev/null +++ b/src/gurubase-backend/backend/core/migrations/0060_alter_crawlstate_guru_type.py @@ -0,0 +1,19 @@ +# Generated by Django 4.2.18 on 2025-03-04 12:22 + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0059_gurucreationform_source'), + ] + + operations = [ + migrations.AlterField( + model_name='crawlstate', + name='guru_type', + field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.CASCADE, to='core.gurutype'), + ), + ] diff --git a/src/gurubase-backend/backend/core/models.py b/src/gurubase-backend/backend/core/models.py index 8497bb4c..19d32fe4 100644 --- a/src/gurubase-backend/backend/core/models.py +++ b/src/gurubase-backend/backend/core/models.py @@ -1650,7 +1650,7 @@ class Source(models.TextChoices): end_time = models.DateTimeField(null=True, blank=True) last_polled_at = models.DateTimeField(auto_now_add=True) link_limit = models.IntegerField(default=1500) - guru_type = models.ForeignKey(GuruType, on_delete=models.CASCADE) + guru_type = models.ForeignKey(GuruType, on_delete=models.CASCADE, null=True, blank=True) user = models.ForeignKey(User, on_delete=models.CASCADE, null=True, blank=True) # null on selfhosted def __str__(self): diff --git a/src/gurubase-backend/backend/core/prompts.py b/src/gurubase-backend/backend/core/prompts.py index b81d272d..30130ece 100644 --- a/src/gurubase-backend/backend/core/prompts.py +++ b/src/gurubase-backend/backend/core/prompts.py @@ -58,7 +58,7 @@ 2. Contexts are not the exact answer, but they are relevant information to answer the question. 3. Highlight critical information in bold for emphasis. 4. Explain concepts whenever possible, being informative and helpful. -5. Provide references and links to sources mentioned in the context links and titles when applicable. Do not reference like "Context 1" or "Context 2". Add references like [Title](link) if applicable. +5. Provide references and links to sources mentioned in the context links and titles when applicable. Do not reference like "Context 1" or "Context 2". Add references like [Title](link) if applicable. However, for pdf files, only refer to the pdf title. 6. Demonstrate concepts with examples when possible. 7. Use code blocks for any code snippets. 8. Use exact names from contexts for functions/classes/methods. diff --git a/src/gurubase-backend/backend/core/serializers.py b/src/gurubase-backend/backend/core/serializers.py index 016e51b9..63e28f1e 100644 --- a/src/gurubase-backend/backend/core/serializers.py +++ b/src/gurubase-backend/backend/core/serializers.py @@ -161,5 +161,5 @@ class Meta: def to_representation(self, instance): repr = super().to_representation(instance) - repr['guru_type'] = instance.guru_type.slug + repr['guru_type'] = instance.guru_type.slug if instance.guru_type else None return repr diff --git a/src/gurubase-backend/backend/core/tasks.py b/src/gurubase-backend/backend/core/tasks.py index 8f7753bb..1607780a 100644 --- a/src/gurubase-backend/backend/core/tasks.py +++ b/src/gurubase-backend/backend/core/tasks.py @@ -1370,14 +1370,14 @@ def process_guru_type(guru_type): if len(structure) > data_source.guru_type.github_file_count_limit_per_repo_hard: raise GithubRepoFileCountLimitError( - f"The codebase exceeds the maximum file limit of {data_source.guru_type.github_file_count_limit_per_repo_hard} files supported." + f"The codebase ({len(structure)}) exceeds the maximum file limit of {data_source.guru_type.github_file_count_limit_per_repo_hard} files supported." ) # Calculate total size total_size = sum(file['size'] for file in structure) if total_size > data_source.guru_type.github_repo_size_limit_mb * 1024 * 1024: raise GithubRepoSizeLimitError( - f"The codebase exceeds the maximum size limit of {data_source.guru_type.github_repo_size_limit_mb} MB supported." + f"The codebase ({total_size / (1024 * 1024):.2f} MB) exceeds the maximum size limit of {data_source.guru_type.github_repo_size_limit_mb} MB supported." ) # Get existing files for this data source @@ -1461,6 +1461,7 @@ def process_guru_type(guru_type): data_source.in_milvus = False data_source.error = "" data_source.user_error = "" + data_source.status = DataSource.Status.SUCCESS data_source.save() data_source.write_to_milvus() @@ -1488,7 +1489,7 @@ def process_guru_type(guru_type): data_source.error = error_msg data_source.status = DataSource.Status.FAIL if data_source.last_successful_index_date: - user_error = f"An issue occurred while reindexing the codebase. The repository has grown beyond our size limit of {data_source.guru_type.github_repo_size_limit_mb} MB. No worries though - this guru still uses the codebase indexed on {data_source.last_successful_index_date.strftime('%B %d')}. Reindexing will be attempted again later." + user_error = f"An issue occurred while reindexing the codebase. The repository size ({total_size / (1024 * 1024):.2f} MB) has grown beyond our size limit of {data_source.guru_type.github_repo_size_limit_mb} MB. No worries though - this guru still uses the codebase indexed on {data_source.last_successful_index_date.strftime('%B %d')}. Reindexing will be attempted again later." else: user_error = str(e) data_source.user_error = user_error @@ -1501,7 +1502,7 @@ def process_guru_type(guru_type): data_source.error = error_msg data_source.status = DataSource.Status.FAIL if data_source.last_successful_index_date: - user_error = f"An issue occurred while reindexing the codebase. The repository has grown beyond our file count limit of {data_source.guru_type.github_file_count_limit_per_repo_hard} files. No worries though - this guru still uses the codebase indexed on {data_source.last_successful_index_date.strftime('%B %d')}. Reindexing will be attempted again later." + user_error = f"An issue occurred while reindexing the codebase. The repository has grown to {len(structure)} files, which exceeds our file count limit of {data_source.guru_type.github_file_count_limit_per_repo_hard} files. No worries though - this guru still uses the codebase indexed on {data_source.last_successful_index_date.strftime('%B %d')}. Reindexing will be attempted again later." else: user_error = str(e) data_source.user_error = user_error diff --git a/src/gurubase-backend/backend/core/utils.py b/src/gurubase-backend/backend/core/utils.py index 571ab163..accb6126 100644 --- a/src/gurubase-backend/backend/core/utils.py +++ b/src/gurubase-backend/backend/core/utils.py @@ -3167,6 +3167,7 @@ def format_date_updated(date_updated: datetime) -> str: return date_updated.strftime('%-d %B %Y') if date_updated else None def format_references(references: list, api: bool = False) -> list: + from core.gcp import replace_media_root_with_base_url processed_references = [] for reference in references: if 'question' in reference and 'link' in reference: @@ -3205,7 +3206,8 @@ def format_references(references: list, api: bool = False) -> list: if settings.ENV == 'selfhosted': for reference in processed_references: if reference['link'] == pdf_data_source.url: - reference['link'] = reference['link'].replace("/workspace/backend", "") + reference['link'] = replace_media_root_with_base_url(reference['link']) + return processed_references diff --git a/src/gurubase-backend/backend/core/views.py b/src/gurubase-backend/backend/core/views.py index f8b8ea35..763e6233 100644 --- a/src/gurubase-backend/backend/core/views.py +++ b/src/gurubase-backend/backend/core/views.py @@ -23,7 +23,7 @@ from core.data_sources import CrawlService from core.serializers import WidgetIdSerializer, BingeSerializer, DataSourceSerializer, GuruTypeSerializer, GuruTypeInternalSerializer, QuestionCopySerializer, FeaturedDataSourceSerializer, APIKeySerializer, DataSourceAPISerializer, SettingsSerializer from core.auth import auth, follow_up_examples_auth, jwt_auth, combined_auth, stream_combined_auth, api_key_auth -from core.gcp import replace_media_root_with_localhost, replace_media_root_with_nginx_base_url +from core.gcp import replace_media_root_with_base_url, replace_media_root_with_nginx_base_url from core.models import CrawlState, FeaturedDataSource, Question, ContentPageStatistics, WidgetId, Binge, DataSource, GuruType, Integration, Thread, APIKey, GuruCreationForm from accounts.models import User from core.utils import ( @@ -1519,7 +1519,7 @@ def get_guru_visuals(request): guru_type = request.guru_type response = { 'colors': guru_type.colors, - 'icon_url': replace_media_root_with_localhost(guru_type.icon_url), + 'icon_url': replace_media_root_with_base_url(guru_type.icon_url), 'name': guru_type.name, 'slug': guru_type.slug, } @@ -2796,10 +2796,9 @@ def start_crawl_api(request, guru_slug): @api_view(['POST']) @jwt_auth -def stop_crawl_admin(request, guru_slug, crawl_id): +def stop_crawl_admin(request, crawl_id): try: data, return_status = CrawlService.stop_crawl( - guru_slug, request.user, crawl_id ) @@ -2811,10 +2810,9 @@ def stop_crawl_admin(request, guru_slug, crawl_id): @api_view(['POST']) @api_key_auth @throttle_classes([ConcurrencyThrottleApiKey]) -def stop_crawl_api(request, guru_slug, crawl_id): +def stop_crawl_api(request, crawl_id): try: data, return_status = CrawlService.stop_crawl( - guru_slug, request.user, crawl_id ) @@ -2825,10 +2823,9 @@ def stop_crawl_api(request, guru_slug, crawl_id): @api_view(['GET']) @jwt_auth -def get_crawl_status_admin(request, guru_slug, crawl_id): +def get_crawl_status_admin(request, crawl_id): try: data, return_status = CrawlService.get_crawl_status( - guru_slug, request.user, crawl_id ) @@ -2840,10 +2837,9 @@ def get_crawl_status_admin(request, guru_slug, crawl_id): @api_view(['GET']) @api_key_auth @throttle_classes([ConcurrencyThrottleApiKey]) -def get_crawl_status_api(request, guru_slug, crawl_id): +def get_crawl_status_api(request, crawl_id): try: data, return_status = CrawlService.get_crawl_status( - guru_slug, request.user, crawl_id ) diff --git a/src/gurubase-frontend/src/app/actions.js b/src/gurubase-frontend/src/app/actions.js index ca0d929b..0ced0728 100644 --- a/src/gurubase-frontend/src/app/actions.js +++ b/src/gurubase-frontend/src/app/actions.js @@ -1152,10 +1152,10 @@ export async function startCrawl(url, guruSlug) { } } -export async function stopCrawl(crawlId, guruSlug) { +export async function stopCrawl(crawlId) { try { const response = await makeAuthenticatedRequest( - `${process.env.NEXT_PUBLIC_BACKEND_FETCH_URL}/${guruSlug}/crawl/${crawlId}/stop/`, + `${process.env.NEXT_PUBLIC_BACKEND_FETCH_URL}/crawl/${crawlId}/stop/`, { method: "POST", headers: { "Content-Type": "application/json" } @@ -1172,10 +1172,10 @@ export async function stopCrawl(crawlId, guruSlug) { } } -export async function getCrawlStatus(crawlId, guruSlug) { +export async function getCrawlStatus(crawlId) { try { const response = await makeAuthenticatedRequest( - `${process.env.NEXT_PUBLIC_BACKEND_FETCH_URL}/${guruSlug}/crawl/${crawlId}/status/`, + `${process.env.NEXT_PUBLIC_BACKEND_FETCH_URL}/crawl/${crawlId}/status/`, { method: "GET", headers: { "Content-Type": "application/json" } diff --git a/src/gurubase-frontend/src/components/Content/index.js b/src/gurubase-frontend/src/components/Content/index.js index 48c9bbd9..ebfd367c 100644 --- a/src/gurubase-frontend/src/components/Content/index.js +++ b/src/gurubase-frontend/src/components/Content/index.js @@ -568,7 +568,7 @@ const Content = (props) => {

Bot Conversation

-

+

This binge is from a conversation on{" "} {finalSource?.charAt(0).toUpperCase() + finalSource?.slice(1).toLowerCase()} @@ -763,7 +763,8 @@ const Content = (props) => { {/* Mobile Binge Map section */} {typeof window !== "undefined" && slug && ( <> - {finalBingeId && + {treeData?.children?.length > 0 && + finalBingeId && !isBingeMapOpen && !isLoading && !streamingStatus && diff --git a/src/gurubase-frontend/src/components/GuruEditPageSidebar.jsx b/src/gurubase-frontend/src/components/GuruEditPageSidebar.jsx index 81d605be..8c7ac60c 100644 --- a/src/gurubase-frontend/src/components/GuruEditPageSidebar.jsx +++ b/src/gurubase-frontend/src/components/GuruEditPageSidebar.jsx @@ -77,7 +77,9 @@ export default function GuruEditPageSidebar({ guruData }) { variant="outline" size="smButtonLgText" className="w-full text-black hover:bg-gray-800 hover:text-white rounded-full" - onClick={() => handleNavigation(`/g/${guruSlug}`)}> + onClick={() => + window.open(`/g/${guruSlug}`, "_blank", "noopener,noreferrer") + }>

Visit Guru { const editorRef = useRef(null); const [sitemapUrl, setSitemapUrl] = useState(""); - const [isLoadingSitemap, setIsLoadingSitemap] = useState(false); const [showSitemapInput, setShowSitemapInput] = useState(false); const [startingCrawl, setStartingCrawl] = useState(false); const [stoppingCrawl, setStoppingCrawl] = useState(false); @@ -127,8 +129,12 @@ const MonacoUrlEditor = ({ } try { - setIsLoadingSitemap(true); + onSitemapLoadingChange(true); const response = await parseSitemapUrls(sitemapUrl); + // If the sitemap loading state was reset (due to stop action), don't process the response + if (!isLoadingSitemapRef.current) { + return; + } if (response.error || response.msg) { CustomToast({ @@ -175,7 +181,7 @@ const MonacoUrlEditor = ({ variant: "error" }); } finally { - setIsLoadingSitemap(false); + onSitemapLoadingChange(false); } }; @@ -197,7 +203,7 @@ const MonacoUrlEditor = ({ } }; - const buttonContent = () => { + const crawlButtonContent = () => { if (!isCrawling) { return (
@@ -317,24 +347,12 @@ const MonacoUrlEditor = ({ onChange={(e) => setSitemapUrl(e.target.value)} />
- + {sitemapButtonContent()}
@@ -55,4 +76,4 @@ const CrawlStopConfirmationDialog = ({ ); }; -export default CrawlStopConfirmationDialog; +export default ProcessStopConfirmationDialog; diff --git a/src/gurubase-frontend/src/components/NewEditGuru/SourceDialog.jsx b/src/gurubase-frontend/src/components/NewEditGuru/SourceDialog.jsx index b4a09b51..7a73479b 100644 --- a/src/gurubase-frontend/src/components/NewEditGuru/SourceDialog.jsx +++ b/src/gurubase-frontend/src/components/NewEditGuru/SourceDialog.jsx @@ -6,7 +6,7 @@ import MonacoUrlEditor from "@/components/NewEditGuru/MonacoUrlEditor"; import { Button } from "@/components/ui/button"; import { cn } from "@/lib/utils"; import { isValidUrl } from "@/utils/common"; -import CrawlStopConfirmationDialog from "@/components/NewEditGuru/CrawlStopConfirmationDialog"; +import ProcessStopConfirmationDialog from "@/components/NewEditGuru/ProcessStopConfirmationDialog"; import { UrlTableContent } from "./UrlTableContent"; @@ -63,19 +63,34 @@ const SourceDialog = React.memo( crawlUrl, setCrawlUrl }) => { - const [showCrawlStopConfirmation, setShowCrawlStopConfirmation] = + const [showStopConfirmation, setShowStopConfirmation] = React.useState(false); const [isClosing, setIsClosing] = React.useState(false); const [stopAction, setStopAction] = React.useState(null); + const [processType, setProcessType] = React.useState("crawling"); + const [isLoadingSitemap, setIsLoadingSitemap] = React.useState(false); + const isLoadingSitemapRef = React.useRef(false); - const handleCrawlStop = React.useCallback((action) => { + const handleProcessStop = React.useCallback((action, type) => { setStopAction(action); - setShowCrawlStopConfirmation(true); + setProcessType(type); + setShowStopConfirmation(true); + }, []); + + // Create a wrapper function to update both state and ref + const updateSitemapLoadingState = React.useCallback((loading) => { + setIsLoadingSitemap(loading); + isLoadingSitemapRef.current = loading; }, []); const handleClose = React.useCallback(async () => { if (isCrawling) { - handleCrawlStop("close"); + handleProcessStop("close", "crawling"); + return; + } + + if (isLoadingSitemap) { + handleProcessStop("close", "sitemap"); return; } @@ -101,7 +116,13 @@ const SourceDialog = React.memo( ...(form.getValues(`${sourceType}Links`) || []), ...newUrls.map((url) => url.url) ]); + } else { + onAddUrls([]); + form.setValue(`${sourceType}Links`, []); } + } else { + onAddUrls([]); + form.setValue(`${sourceType}Links`, []); } // Reset showCrawlInput when closing @@ -118,31 +139,50 @@ const SourceDialog = React.memo( editorContent, form, isCrawling, + isLoadingSitemap, onAddUrls, onOpenChange, sourceType, setShowCrawlInput ]); - const handleConfirmStopCrawl = React.useCallback(async () => { + const handleConfirmStop = React.useCallback(async () => { if (isClosing) return; setIsClosing(true); try { - await onStopCrawl(); + if (processType === "crawling") { + await onStopCrawl(); + } else if (processType === "sitemap") { + // Reset sitemap loading state using the wrapper function + updateSitemapLoadingState(false); + } + await new Promise((resolve) => setTimeout(resolve, 100)); - setShowCrawlStopConfirmation(false); + setShowStopConfirmation(false); if (stopAction === "close") { + // Reset states before closing + if (processType === "crawling") { + setShowCrawlInput(false); + setCrawlUrl(""); + } onOpenChange(false); } } finally { - setShowCrawlInput(false); - setCrawlUrl(""); setIsClosing(false); setStopAction(null); + setProcessType("crawling"); } - }, [onStopCrawl, onOpenChange, stopAction]); + }, [ + onStopCrawl, + onOpenChange, + stopAction, + processType, + setShowCrawlInput, + setCrawlUrl, + updateSitemapLoadingState + ]); const handleDialogClose = React.useCallback( (e) => { @@ -193,11 +233,16 @@ const SourceDialog = React.memo( onChange={onEditorChange} onStartCrawl={onStartCrawl} isCrawling={isCrawling} - onStopCrawl={() => handleCrawlStop("stop")} + onStopCrawl={() => handleProcessStop("stop", "crawling")} showCrawlInput={showCrawlInput} setShowCrawlInput={setShowCrawlInput} crawlUrl={crawlUrl} setCrawlUrl={setCrawlUrl} + isLoadingSitemapRef={isLoadingSitemapRef} + onSitemapLoadingChange={updateSitemapLoadingState} + onStopSitemapLoading={() => + handleProcessStop("stop", "sitemap") + } /> ) : (
@@ -226,13 +271,13 @@ const SourceDialog = React.memo( - ); diff --git a/src/gurubase-frontend/src/components/NewEditGuru/UrlTableContent.jsx b/src/gurubase-frontend/src/components/NewEditGuru/UrlTableContent.jsx index be6013b7..83b89bab 100644 --- a/src/gurubase-frontend/src/components/NewEditGuru/UrlTableContent.jsx +++ b/src/gurubase-frontend/src/components/NewEditGuru/UrlTableContent.jsx @@ -258,7 +258,12 @@ export const UrlTableContent = ({ {/* Add trash icon that appears on hover */}
diff --git a/src/gurubase-frontend/src/components/NewGuru.jsx b/src/gurubase-frontend/src/components/NewGuru.jsx index 8dd4e093..d50d6644 100644 --- a/src/gurubase-frontend/src/components/NewGuru.jsx +++ b/src/gurubase-frontend/src/components/NewGuru.jsx @@ -186,7 +186,7 @@ export default function NewGuru({ guruData, isProcessing }) { return updatedContent; }); - }, guruData?.slug); + }, guruData?.slug || null); // Only initialize Auth0 hooks if in selfhosted mode const isSelfHosted = process.env.NEXT_PUBLIC_NODE_ENV === "selfhosted"; @@ -1016,6 +1016,11 @@ export default function NewGuru({ guruData, isProcessing }) { throw new Error(guruResponse.message); } + if (data.guruLogo instanceof File) { + setSelectedFile(null); + setIconUrl(guruResponse.icon_url || customGuruData?.icon_url); + } + const guruSlug = isEditMode ? customGuru : guruResponse.slug; // Fetch updated guru data after create/update @@ -1969,8 +1974,7 @@ export default function NewGuru({ guruData, isProcessing }) { name="githubRepos" render={({ field }) => { // Ensure we're always using the latest values from the form - const repos = - currentGithubRepos.length > 0 ? currentGithubRepos : field.value; + const repos = field.value; return ( @@ -2244,7 +2248,7 @@ export default function NewGuru({ guruData, isProcessing }) { document.getElementById("logo-upload").click() }> {" "} - {iconUrl ? "Change Logo" : "Upload Logo"} + {isEditMode ? "Change Logo" : "Upload Logo"} We support PNG, JPEG under 1MB. diff --git a/src/gurubase-frontend/src/components/WidgetId.js b/src/gurubase-frontend/src/components/WidgetId.js index 466c58fe..43b68cc9 100644 --- a/src/gurubase-frontend/src/components/WidgetId.js +++ b/src/gurubase-frontend/src/components/WidgetId.js @@ -35,11 +35,10 @@ export default function WidgetModal({ data-widget-id="${widgetId}" data-text="Ask AI" data-margins='{"bottom": "1rem", "right": "1rem"}' - data-light-mode="true"${ - isSelfHosted - ? ` - data-baseUrl="http://localhost:8029/api/" ` - : "" + data-light-mode="true"${isSelfHosted + ? ` + data-baseUrl="http://localhost:8029/api/" ` + : "" } id="guru-widget-id"> `; @@ -139,9 +138,8 @@ export default function WidgetModal({