File size: 16,270 Bytes
41ff223
bfb609d
ebe1956
8a46019
e5b56c6
6368a59
8a46019
 
 
 
3b97453
bfb609d
1ff1f45
 
bfb609d
 
8a46019
 
1ff1f45
8a46019
 
1ff1f45
8a46019
bfb609d
8a46019
 
bfb609d
8a46019
 
1ff1f45
8a46019
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bfb609d
8a46019
 
 
bfb609d
8a46019
 
 
 
 
 
 
 
 
 
 
 
 
 
e5b56c6
8a46019
 
 
41ff223
1ff1f45
6368a59
 
8a46019
 
 
 
 
 
 
6368a59
8a46019
ebe1956
 
 
 
 
 
 
 
 
 
 
bfb609d
ebe1956
 
bfb609d
ebe1956
 
bfb609d
8a46019
 
 
ebe1956
8a46019
ebe1956
 
8a46019
ebe1956
8a46019
 
 
ebe1956
8a46019
 
 
 
 
 
ebe1956
8a46019
 
 
 
fa4b185
8a46019
 
bfb609d
ebe1956
8a46019
 
 
 
bfb609d
1ff1f45
8a46019
e5b56c6
ebe1956
 
8a46019
 
ebe1956
8a46019
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fa4b185
8a46019
 
 
 
 
 
 
fa4b185
8a46019
 
 
ebe1956
8a46019
 
 
 
ebe1956
8a46019
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ebe1956
8a46019
 
 
 
ebe1956
8a46019
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a271ec8
8a46019
 
 
 
 
ebe1956
8a46019
 
 
fa4b185
 
8a46019
 
 
 
 
 
 
 
 
4b05c32
8a46019
4b05c32
a271ec8
8a46019
 
 
 
 
4b05c32
8a46019
 
02f680e
8a46019
 
 
 
6368a59
8a46019
 
ebe1956
8a46019
ebe1956
8a46019
 
 
 
e5b56c6
8a46019
45d3861
8a46019
 
 
 
 
 
 
45d3861
ebe1956
8a46019
 
 
 
 
41ff223
 
 
8a46019
 
 
 
 
 
 
41ff223
 
8a46019
 
 
41ff223
8a46019
 
 
 
 
1ff1f45
8a46019
1ff1f45
8a46019
 
 
1ff1f45
 
8a46019
 
 
1ff1f45
8a46019
1ff1f45
41ff223
 
1ff1f45
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
from threading import Thread, Event
import re
import time
import html

# --- Configuration ---
MODEL_ID = "WeiboAI/VibeThinker-1.5B"

class VibeThinkerModel:
    def __init__(self):
        self.model = None
        self.tokenizer = None
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        self.stop_signal = Event()
        
    def load_model(self):
        if self.model is not None: return
        print(f"🔄 Loading {MODEL_ID}...")
        try:
            self.tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
            self.model = AutoModelForCausalLM.from_pretrained(
                MODEL_ID,
                torch_dtype=torch.bfloat16 if torch.cuda.is_bf16_supported() else torch.float16,
                device_map="auto",
                trust_remote_code=True,
                low_cpu_mem_usage=True
            )
            print("✅ Model loaded.")
        except Exception as e:
            raise e

    def stop_generation(self):
        self.stop_signal.set()

    def _detect_tail_loop(self, text, min_phrase_len=3, max_phrase_len=10, threshold=20):
        """
        Detects if the generator has gotten stuck in a loop at the END of the text.
        Criteria: A phrase of 3-10 words repeated at least 20 times consecutively.
        """
        words = text.split()
        total_words = len(words)
        
        # We need at least (min_phrase * threshold) words to even check
        if total_words < min_phrase_len * threshold:
            return False

        # Only check the end of the string (optimization)
        # We look at the last (max_phrase * threshold) words
        check_window = max_phrase_len * threshold
        recent_words = words[-check_window:] if total_words > check_window else words
        
        for phrase_len in range(min_phrase_len, max_phrase_len + 1):
            # The candidate phrase is the very last 'phrase_len' words
            candidate_phrase = recent_words[-phrase_len:]
            
            # Construct what the tail SHOULD look like if it's looping
            # e.g. if phrase is "and then", we expect "and then and then..."
            # We check if the tail of the text matches (phrase * threshold)
            
            required_len = phrase_len * threshold
            if len(recent_words) < required_len:
                continue
                
            segment_to_check = recent_words[-required_len:]
            
            # Efficient check: does the segment consist ONLY of the candidate phrase?
            # We compare the segment against the candidate phrase repeated
            expected_segment = candidate_phrase * threshold
            
            if segment_to_check == expected_segment:
                return True
                
        return False

    def generate_response_streaming(self, prompt, temperature=0.6, max_new_tokens=32000):
        if not self.model: self.load_model()
        self.stop_signal.clear()
        
        try:
            start_time = time.time()
            
            # Optimized Prompt for VibeThinker
            messages = [
                {"role": "system", "content": "You are an expert algorithm engineer. Analyze the problem deeply, then provide a clean Python solution."},
                {"role": "user", "content": prompt}
            ]
            text_input = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
            inputs = self.tokenizer(text_input, return_tensors="pt").to(self.device)
            
            streamer = TextIteratorStreamer(self.tokenizer, skip_prompt=True, skip_special_tokens=True)
            
            generation_kwargs = dict(
                **inputs,
                max_new_tokens=max_new_tokens,
                temperature=temperature,
                top_p=0.95,
                top_k=50,
                do_sample=True,
                pad_token_id=self.tokenizer.eos_token_id,
                streamer=streamer,
            )
            
            thread = Thread(target=self.model.generate, kwargs=generation_kwargs)
            thread.start()
            
            generated_text = ""
            loop_detected = False
            
            # Token counter for loop check frequency
            token_count = 0
            
            for new_text in streamer:
                if self.stop_signal.is_set(): break
                
                generated_text += new_text
                token_count += 1
                
                # Check for loops every 10 tokens to save CPU
                if token_count % 10 == 0:
                    if self._detect_tail_loop(generated_text):
                        loop_detected = True
                        self.stop_signal.set() # Stop the model
                        # Optional: Truncate the repetitive garbage
                        # (Simple truncation for UI cleanliness)
                        generated_text = generated_text + "\n\n[⚠️ Generation stopped: Infinite loop detected]"
                        break

                yield generated_text, {
                    "time": time.time() - start_time,
                    "tokens": len(self.tokenizer.encode(generated_text)),
                    "generating": True
                }
            
            if not self.stop_signal.is_set():
                thread.join()
            
            yield generated_text, {
                "time": time.time() - start_time,
                "tokens": len(self.tokenizer.encode(generated_text)),
                "generating": False
            }
            
        except Exception as e:
            yield f"Error: {str(e)}", None

vibe_model = VibeThinkerModel()

class ModernUIParser:
    """Parses text into a structured, modern UI"""
    
    def format_code(self, code, lang="python"):
        """Applies basic HTML syntax highlighting regex"""
        code = html.escape(code)
        # Comments
        code = re.sub(r'(#.*?)(?=\n|$)', r'<span class="c">\1</span>', code)
        # Keywords
        keywords = r'\b(def|class|return|if|else|elif|for|while|import|from|try|except|with|as|pass|None|True|False)\b'
        code = re.sub(keywords, r'<span class="k">\1</span>', code)
        # Builtins/Calls
        code = re.sub(r'\b(print|len|range|enumerate|zip|super|__init__)\b', r'<span class="nf">\1</span>', code)
        # Strings
        code = re.sub(r'(&quot;.*?&quot;)', r'<span class="s">\1</span>', code)
        code = re.sub(r"('.*?')", r'<span class="s">\1</span>', code)
        return code

    def parse_and_render(self, text, stats):
        # 1. Separate Thinking from Content
        # Heuristic: Content before the first code block or explicit "Solution" header is usually thinking
        thinking = ""
        solution = text
        
        # Find split point
        markers = ["```", "Here is the solution", "### Solution", "Implementation:"]
        first_marker_idx = len(text)
        for m in markers:
            idx = text.find(m)
            if idx != -1 and idx < first_marker_idx:
                first_marker_idx = idx
        
        if first_marker_idx < len(text) and first_marker_idx > 50:
            thinking = text[:first_marker_idx].strip()
            solution = text[first_marker_idx:].strip()
        
        # 2. Process Solution Text (Markdown-ish to HTML)
        # Handle Code Blocks
        parts = re.split(r'(```\w*\n.*?```)', solution, flags=re.DOTALL)
        solution_html = ""
        
        for part in parts:
            if part.startswith('```'):
                # Extract lang and code
                match = re.match(r'```(\w*)\n(.*?)```', part, re.DOTALL)
                if match:
                    lang = match.group(1) or "text"
                    code_content = match.group(2)
                    highlighted = self.format_code(code_content, lang)
                    solution_html += f"""
                    <div class="code-block">
                        <div class="code-header">
                            <span class="lang-tag">{lang}</span>
                            <span class="copy-btn" onclick="navigator.clipboard.writeText(this.parentElement.nextElementSibling.innerText)">Copy</span>
                        </div>
                        <pre>{highlighted}</pre>
                    </div>"""
                else:
                    solution_html += f"<pre>{html.escape(part)}</pre>"
            else:
                # Normal text processing
                clean_text = html.escape(part)
                # Headers
                clean_text = re.sub(r'^### (.*?)$', r'<h3>\1</h3>', clean_text, flags=re.M)
                clean_text = re.sub(r'^## (.*?)$', r'<h2>\1</h2>', clean_text, flags=re.M)
                clean_text = re.sub(r'\*\*(.*?)\*\*', r'<strong>\1</strong>', clean_text)
                # Line breaks
                clean_text = clean_text.replace('\n', '<br>')
                solution_html += f"<div class='text-content'>{clean_text}</div>"

        # 3. Process Thinking
        thinking_html = html.escape(thinking).replace('\n', '<br>')
        
        # 4. Stats & Cursor
        is_gen = stats['generating'] if stats else False
        t_sec = stats['tokens'] / stats['time'] if stats and stats['time'] > 0 else 0
        cursor = '<span class="cursor"></span>' if is_gen else ''
        
        # CSS Styles (Modern Dark Theme)
        css = """
        <style>
            :root { --bg: #0f1117; --card: #1e293b; --accent: #6366f1; --text: #e2e8f0; --dim: #94a3b8; }
            .ui-container { font-family: 'Inter', system-ui, sans-serif; color: var(--text); line-height: 1.6; }
            
            /* Stats Bar */
            .stats-bar { display: flex; gap: 15px; margin-bottom: 20px; font-size: 12px; text-transform: uppercase; letter-spacing: 1px; }
            .stat-pill { background: #334155; padding: 4px 10px; border-radius: 20px; color: #cbd5e1; display: flex; align-items: center; gap: 6px; }
            .stat-active { border: 1px solid var(--accent); color: var(--accent); background: rgba(99, 102, 241, 0.1); }
            
            /* Thinking Section */
            details.thinking-box { margin-bottom: 20px; border: 1px solid #312e81; border-radius: 8px; background: rgba(49, 46, 129, 0.1); overflow: hidden; }
            details.thinking-box summary { padding: 12px 16px; cursor: pointer; font-weight: 600; color: #818cf8; list-style: none; outline: none; user-select: none; }
            details.thinking-box summary::marker { display: none; }
            details.thinking-box summary:hover { background: rgba(49, 46, 129, 0.2); }
            .thought-content { padding: 16px; font-family: 'JetBrains Mono', monospace; font-size: 13px; color: #a5b4fc; border-top: 1px solid #312e81; }
            
            /* Solution Section */
            .solution-box { background: var(--bg); padding: 10px 0; }
            .text-content { margin-bottom: 10px; }
            h2, h3 { color: white; margin-top: 20px; margin-bottom: 10px; font-weight: 600; }
            strong { color: #fff; font-weight: 700; }
            
            /* Code Blocks */
            .code-block { background: #0d1117; border: 1px solid #30363d; border-radius: 8px; margin: 15px 0; overflow: hidden; }
            .code-header { background: #161b22; padding: 6px 12px; display: flex; justify-content: space-between; align-items: center; border-bottom: 1px solid #30363d; }
            .lang-tag { font-size: 11px; color: #8b949e; text-transform: uppercase; font-weight: bold; }
            .copy-btn { font-size: 11px; cursor: pointer; color: #58a6ff; }
            .copy-btn:hover { text-decoration: underline; }
            pre { margin: 0; padding: 16px; overflow-x: auto; font-family: 'Fira Code', 'Consolas', monospace; font-size: 14px; color: #c9d1d9; }
            
            /* Syntax Highlighting Colors */
            .k { color: #ff7b72; } /* Keyword */
            .s { color: #a5d6ff; } /* String */
            .c { color: #8b949e; font-style: italic; } /* Comment */
            .nf { color: #d2a8ff; } /* Function */
            
            /* Cursor Animation */
            .cursor { display: inline-block; width: 8px; height: 18px; background: var(--accent); vertical-align: text-bottom; animation: blink 1s step-end infinite; margin-left: 2px; }
            @keyframes blink { 0%, 100% { opacity: 1; } 50% { opacity: 0; } }
        </style>
        """
        
        html_out = f"""{css}
        <div class="ui-container">
            <div class="stats-bar">
                <div class="stat-pill {'stat-active' if is_gen else ''}">
                    { '🟢 GENERATING' if is_gen else '⚪ COMPLETE' }
                </div>
                <div class="stat-pill">⏱️ {stats['time']:.1f}s</div>
                <div class="stat-pill">⚡ {t_sec:.1f} T/s</div>
                <div class="stat-pill">📝 {stats['tokens']} Tok</div>
            </div>
        """
        
        if thinking:
            # Open by default if generating, closed if done
            is_open = "open" if is_gen else ""
            html_out += f"""
            <details class="thinking-box" {is_open}>
                <summary>🧠 Chain of Thought (Process)</summary>
                <div class="thought-content">
                    {thinking_html} {cursor if not solution else ''}
                </div>
            </details>
            """
            
        html_out += f"""
            <div class="solution-box">
                {solution_html} {cursor if solution or not thinking else ''}
            </div>
        </div>
        """
        
        return html_out

parser = ModernUIParser()

def run_gen(prompt, temp, max_tokens):
    if not prompt: return "Please enter a prompt."
    
    gen = vibe_model.generate_response_streaming(prompt, temp, max_tokens)
    for text, stats in gen:
        if stats:
            yield parser.parse_and_render(text, stats)
        else:
            yield f"<div style='color:red'>Error: {text}</div>"

def stop_action():
    vibe_model.stop_generation()

# --- GRADIO INTERFACE ---
with gr.Blocks(
    title="VibeThinker IDE",
    theme=gr.themes.Base(
        primary_hue="indigo",
        neutral_hue="slate",
        font=("Inter", "sans-serif")
    ),
    css=".gradio-container { background-color: #0f1117 !important; border: none; }"
) as demo:
    
    gr.Markdown("""
    <div style="text-align: center; margin-bottom: 20px;">
        <h1 style="color: white; font-size: 2rem;">⚡ VibeThinker IDE</h1>
        <p style="color: #94a3b8;">Specialized 1.5B Model for Algorithms & Competitive Coding</p>
    </div>
    """)
    
    with gr.Row():
        # Left Column: Inputs
        with gr.Column(scale=1, variant="panel"):
            input_text = gr.Textbox(
                label="Problem Statement", 
                lines=8, 
                placeholder="Paste a LeetCode problem or ask for a specific algorithm...",
                elem_id="input-box"
            )
            
            with gr.Accordion("Settings", open=False):
                temp = gr.Slider(0.1, 1.0, value=0.6, label="Temperature")
                tokens = gr.Slider(1024, 32000, value=8192, label="Max Tokens")
            
            with gr.Row():
                btn_run = gr.Button("▶ Run", variant="primary", scale=2)
                btn_stop = gr.Button("⏹ Stop", variant="stop", scale=1)

        # Right Column: Output
        with gr.Column(scale=2):
            out_html = gr.HTML(label="Result Console")
    
    btn_run.click(run_gen, inputs=[input_text, temp, tokens], outputs=out_html)
    btn_stop.click(stop_action, None, None)

    gr.Examples(
        examples=[
            ["Determine if a Sudoku board is valid. Provide a Python solution with O(1) space complexity if possible."],
            ["Explain the Knuth-Morris-Pratt (KMP) algorithm and implement it in Python."],
            ["Solve the 'Trapping Rain Water' problem using the two-pointer approach."],
        ],
        inputs=input_text
    )

if __name__ == "__main__":
    demo.launch()