Spaces:

adil9858
/

dalton_vision

Sleeping

App Files Files Community

adil9858 commited on May 2

Commit

5a2da34

verified ·

1 Parent(s): 9808be5

Create app.py

Browse files

Files changed (1) hide show

app.py +139 -0

app.py ADDED Viewed

	@@ -0,0 +1,139 @@

+import gradio as gr
+from openai import OpenAI
+import base64
+from PIL import Image
+import io
+from datetime import datetime
+# OpenAI client setup
+client = OpenAI(
+    base_url="https://openrouter.ai/api/v1",
+    api_key='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'
+)
+def analyze_image(image, prompt):
+    if image is None:
+        return "Please upload or capture an image first."
+    # Convert image to base64
+    buffered = io.BytesIO()
+    image.save(buffered, format="JPEG")
+    img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
+    try:
+        response = client.chat.completions.create(
+            model="opengvlab/internvl3-14b:free",
+            messages=[
+                {
+                    "role": "system",
+                    "content": """You are Dalton, an expert AI assistant specialized in image understanding.
+                    Your tasks include:
+                    - Extracting and structuring text from images
+                    - Answering questions about image content
+                    - Providing detailed descriptions
+                    - Analyzing receipts, documents, and other visual content
+                    Be thorough, accurate, and helpful in your responses."""
+                },
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "text", "text": prompt},
+                        {
+                            "type": "image_url",
+                            "image_url": {
+                                "url": f"data:image/jpeg;base64,{img_str}"
+                            }
+                        }
+                    ]
+                }
+            ],
+            max_tokens=2048
+        )
+        result = response.choices[0].message.content
+        return result
+    except Exception as e:
+        return f"An error occurred: {str(e)}"
+# Custom CSS for better mobile experience
+css = """
+#mobile-camera { width: 100% !important; }
+#prompt-textbox { min-height: 100px !important; }
+.result-box {
+    max-height: 500px;
+    overflow-y: auto;
+    padding: 15px;
+    border: 1px solid #e0e0e0;
+    border-radius: 8px;
+}
+.footer {
+    margin-top: 20px;
+    font-size: 12px;
+    color: #666;
+    text-align: center;
+}
+"""
+with gr.Blocks(css=css, title="DaltonVision - Koshur AI") as demo:
+    gr.Markdown("""
+    # 🧾 DaltonVision - InternVL3-14B
+    ### Advanced Image Understanding • Powered by OpenRouter • Developed by [Koshur AI](https://koshurai.com)
+    """)
+    with gr.Row():
+        with gr.Column():
+            # Image input section
+            image_input = gr.Image(
+                sources=["upload", "webcam"],
+                type="pil",
+                label="Upload or Capture Image",
+                elem_id="mobile-camera"
+            )
+            # Prompt input
+            prompt_input = gr.Textbox(
+                label="📝 Enter your question or instruction",
+                value="Extract all content structurally",
+                lines=3,
+                elem_id="prompt-textbox"
+            )
+            submit_btn = gr.Button("🔍 Analyze Image", variant="primary")
+            gr.Examples(
+                examples=[
+                    ["What is the total amount on this receipt?"],
+                    ["List all items and their prices"],
+                    ["Who is the vendor and what is the date?"],
+                    ["Describe this image in detail"]
+                ],
+                inputs=[prompt_input],
+                label="💡 Try these example prompts:"
+            )
+        with gr.Column():
+            # Result output
+            result_output = gr.Markdown(
+                label="✅ Analysis Result",
+                elem_classes="result-box"
+            )
+    # Footer
+    gr.Markdown("""
+    <div class="footer">
+    © 2025 Koshur AI. All rights reserved.<br>
+    Note: Images are processed in real-time and not stored.
+    </div>
+    """)
+    # Button action
+    submit_btn.click(
+        fn=analyze_image,
+        inputs=[image_input, prompt_input],
+        outputs=result_output
+    )
+# Launch the app
+if __name__ == "__main__":
+    demo.launch()