import re
import json
import random
from textblob import TextBlob
import google.generativeai as genai
# speech_recognition removed: using AI-based transcription instead
from pydub import AudioSegment
import os
import shutil
from app.models.topic import ComprehensionQuestion, SpeakingQuestion
from app.models.activity import Activity
from app.models.quiz import VocabularyQuiz, QuizQuestion
from flask import current_app
import logging
import nltk
from nltk.tokenize import sent_tokenize, word_tokenize

# Download NLTK data if needed
try:
    nltk.download('punkt', quiet=True)
except:
    pass  # If download fails, continue anyway

logger = logging.getLogger(__name__)

class AIAssessment:
    def __init__(self):
        # Initialize
        self.model = None
        # recognizer removed; using AI-based transcription instead
        
        # Replace language tool initialization with TextBlob
        try:
            # We don't need to initialize TextBlob specifically
            logger.info("Using TextBlob for grammar checking instead of LanguageTool")
        except Exception as e:
            logger.error(f"Error initializing text analysis tools: {str(e)}")
        
        # Set FFmpeg paths with explicit error checking
        try:
            # Verify paths exist
            if os.path.exists(current_app.config['FFMPEG_PATH']) and os.path.exists(current_app.config['FFPROBE_PATH']):
                logger.info(f"FFmpeg found at: {current_app.config['FFMPEG_PATH']}")
                logger.info(f"FFprobe found at: {current_app.config['FFPROBE_PATH']}")
                
                # Configure pydub paths
                AudioSegment.converter = current_app.config['FFMPEG_PATH']
                AudioSegment.ffmpeg = current_app.config['FFMPEG_PATH']
                AudioSegment.ffprobe = current_app.config['FFPROBE_PATH']
            else:
                # Try to find in PATH
                logger.warning("FFmpeg not found at configured path, searching PATH...")
                ffmpeg_in_path = shutil.which('ffmpeg')
                ffprobe_in_path = shutil.which('ffprobe')
                
                if ffmpeg_in_path and ffprobe_in_path:
                    logger.info(f"Found FFmpeg in PATH: {ffmpeg_in_path}")
                    logger.info(f"Found FFprobe in PATH: {ffprobe_in_path}")
                    
                    # Configure pydub paths
                    AudioSegment.converter = ffmpeg_in_path
                    AudioSegment.ffmpeg = ffmpeg_in_path
                    AudioSegment.ffprobe = ffprobe_in_path
                else:
                    logger.error("FFmpeg/FFprobe not found in configured paths or in PATH")
        except Exception as e:
            logger.error(f"Error setting FFmpeg paths: {str(e)}")
    
    def _configure_genai(self):
        # Configure Gemini API only when needed
        if not hasattr(self, 'genai_configured'):
            api_key = current_app.config['GEMINI_API_KEY']
            
            # Check if the API key is valid (not empty and not the default placeholder)
            if not api_key or api_key == 'your-gemini-api-key-here':
                logger.error("Invalid or missing Gemini API key. Please set a valid GEMINI_API_KEY in your .env file.")
                self.genai_configured = False
                return False
                
            try:
                genai.configure(api_key=api_key)
                self.genai_configured = True
                logger.debug("Gemini API configured successfully")
                return True
            except Exception as e:
                logger.error(f"Error configuring Gemini API: {str(e)}")
                self.genai_configured = False
                return False
        
        return self.genai_configured
    
    def get_model(self):
        """Lazy initialization of Gemini model"""
        if self.model is None:
            if not self._configure_genai():
                logger.error("Failed to configure Gemini API")
                return None
            # List of models to try in order of preference
            models_to_try = [
                'gemini-1.5-flash',   # First choice
                'gemini-pro',         # Second choice
                'gemini-1.0-pro'      # Third choice
            ]
            for model_name in models_to_try:
                try:
                    logger.info(f"Attempting to initialize model: {model_name}")
                    self.model = genai.GenerativeModel(model_name)
                    
                    # Test the model
                    test_response = self.model.generate_content("Hello, world!")
                    logger.info(f"Successfully initialized and tested model: {model_name}")
                    return self.model
                    
                except Exception as e:
                    logger.warning(f"Failed to initialize model {model_name}: {str(e)}")
                    self.model = None  # Reset for next attempt
            
            # If we get here, all models failed
            logger.error("All Gemini models failed to initialize")
            return None
                
        return self.model
        
    def generate_vocabulary_quiz(self, topic_content, num_questions=5):
        """
        Generate vocabulary quiz questions from difficult words in the topic content
        
        Args:
            topic_content (str): The text content to extract vocabulary from
            num_questions (int): Number of questions to generate
            
        Returns:
            list: List of QuizQuestion objects
        """
        try:
            # First, check if the API is properly configured
            if not self._configure_genai():
                logger.error("Failed to configure Gemini API. Using fallback vocabulary quiz.")
                return self._generate_fallback_vocabulary_questions(topic_content, num_questions)
            
            # Get the model
            model = self.get_model()
            if not model:
                logger.error("Failed to get AI model. Using fallback vocabulary quiz.")
                return self._generate_fallback_vocabulary_questions(topic_content, num_questions)
            
            # Create the prompt
            prompt = f"""
            Create a vocabulary quiz with {num_questions} questions based on the following text.
            
            IMPORTANT REQUIREMENTS:
            1. Identify {num_questions} difficult or important words from the text
            2. For each word, create an MCQ question asking for its meaning
            3. Provide 4 options for each question, with one correct answer
            4. Include the sentence from the text where the word appears (as context)
            5. Format your response as JSON with the following structure:
            
            [
                {{
                    "word": "example",
                    "context": "This is an example sentence from the text.",
                    "question": "What does 'example' mean in this context?",
                    "correct_answer": "A representative case or instance",
                    "options": [
                        "A representative case or instance", 
                        "A difficult problem to solve", 
                        "A type of examination", 
                        "A mathematical equation"
                    ]
                }},
                // more questions...
            ]
            
            Text: {topic_content}
            """
            
            # Generate questions
            response = model.generate_content(prompt)
            response_text = response.text.strip()
            
            # Extract JSON from response (in case of markdown code blocks)
            json_match = re.search(r'```json\s*(.*?)\s*```', response_text, re.DOTALL)
            if json_match:
                json_str = json_match.group(1)
            else:
                json_str = response_text
            
            # Clean up any non-JSON text
            json_str = re.sub(r'^[\s\S]*?\[', '[', json_str)
            json_str = re.sub(r'\][\s\S]*?$', ']', json_str)
            
            # Parse JSON and create quiz questions
            questions_data = json.loads(json_str)
            questions = []
            
            for q_data in questions_data[:num_questions]:  # Limit to requested number
                question = QuizQuestion(
                    word=q_data.get('word', ''),
                    context=q_data.get('context', ''),
                    question_text=q_data.get('question', f"What does '{q_data.get('word', '')}' mean?"),
                    correct_answer=q_data.get('correct_answer', ''),
                    options=json.dumps(q_data.get('options', []))
                )
                questions.append(question)
            
            return questions
            
        except Exception as e:
            logger.error(f"Error generating vocabulary quiz: {str(e)}")
            return self._generate_fallback_vocabulary_questions(topic_content, num_questions)

    def _generate_fallback_vocabulary_questions(self, topic_content, num_questions=5):
        """Generate basic vocabulary questions when AI is unavailable"""
        try:
            # Tokenize text into words
            words = word_tokenize(topic_content.lower())
            
            # Remove common words and duplicates
            stopwords = {'a', 'an', 'the', 'and', 'or', 'but', 'if', 'because', 'as', 'what', 'when', 
                        'where', 'how', 'why', 'is', 'am', 'are', 'was', 'were', 'be', 'been', 'being', 
                        'have', 'has', 'had', 'do', 'does', 'did', 'to', 'at', 'by', 'for', 'with', 
                        'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 
                        'above', 'below', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under'}
            
            unique_words = [word for word in words if word.isalpha() and word not in stopwords and len(word) > 4]
            unique_words = list(set(unique_words))  # Remove duplicates
            
            # Extract sentences containing these words
            sentences = sent_tokenize(topic_content)
            word_contexts = {}
            
            for word in unique_words:
                for sentence in sentences:
                    if word in sentence.lower():
                        word_contexts[word] = sentence
                        break
            
            # Select words that have contexts
            selected_words = list(word_contexts.keys())
            if len(selected_words) > num_questions:
                selected_words = random.sample(selected_words, num_questions)
            
            # Create basic questions
            questions = []
            common_definitions = {
                'important': 'having great significance or value',
                'different': 'not the same as another or each other',
                'special': 'better, greater, or otherwise different from what is usual',
                'example': 'a thing characteristic of its kind',
                'learning': 'the acquisition of knowledge or skills',
                'specific': 'clearly defined or identified',
                'general': 'affecting or concerning all or most people, places, or things',
                'complete': 'having all the necessary or appropriate parts',
                'various': 'of different kinds or sorts',
                'similar': 'resembling without being identical',
            }
            
            for word in selected_words:
                # Generate fake options
                if word in common_definitions:
                    correct_answer = common_definitions[word]
                else:
                    correct_answer = f"The meaning of '{word}'"
                
                options = [
                    correct_answer,
                    f"The opposite of '{word}'",
                    f"A type of {word}",
                    f"Related to {word} but different"
                ]
                random.shuffle(options)
                
                question = QuizQuestion(
                    word=word,
                    context=word_contexts[word],
                    question_text=f"What does '{word}' mean?",
                    correct_answer=correct_answer,
                    options=json.dumps(options)
                )
                questions.append(question)
            
            return questions
            
        except Exception as e:
            logger.error(f"Error generating fallback vocabulary questions: {str(e)}")
            
            # Return extremely basic questions if all else fails
            questions = []
            for i in range(min(num_questions, 3)):
                word = f"word{i+1}"
                question = QuizQuestion(
                    word=word,
                    context=f"This is a sentence with {word}.",
                    question_text=f"What does '{word}' mean?",
                    correct_answer=f"Definition of {word}",
                    options=json.dumps([f"Definition of {word}", f"Not {word}", f"Similar to {word}", f"Opposite of {word}"])
                )
                questions.append(question)
            
            return questions
            
    def validate_word_count(self, text, min_words=3, max_words=150):
        """Allow longer text for feedback - up to 150 words"""
        if not text:
            return ""
        
        # For longer texts like feedback, we don't need to strictly limit the length
        # Just ensure it's not empty and remove any excessive whitespace
        cleaned_text = re.sub(r'\s+', ' ', text).strip()
        return cleaned_text