Skip to content

Commit

Permalink
Add 'video/quicktime' into supported video type.
Browse files Browse the repository at this point in the history
Also refactor and unify Gemini's supported modalities between VertexAI and GenAI APIs.

PiperOrigin-RevId: 704397144
  • Loading branch information
yifenglou authored and langfun authors committed Dec 9, 2024
1 parent c37e987 commit 1f805bf
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 53 deletions.
70 changes: 26 additions & 44 deletions langfun/core/llms/google_genai.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

import langfun.core as lf
from langfun.core import modalities as lf_modalities
from langfun.core.llms import vertexai
import pyglove as pg


Expand Down Expand Up @@ -307,71 +308,52 @@ def get(
#


_IMAGE_TYPES = [
'image/png',
'image/jpeg',
'image/webp',
'image/heic',
'image/heif',
]

_AUDIO_TYPES = [
'audio/aac',
'audio/flac',
'audio/mp3',
'audio/m4a',
'audio/mpeg',
'audio/mpga',
'audio/mp4',
'audio/opus',
'audio/pcm',
'audio/wav',
'audio/webm'
]

_VIDEO_TYPES = [
'video/mov',
'video/mpeg',
'video/mpegps',
'video/mpg',
'video/mp4',
'video/webm',
'video/wmv',
'video/x-flv',
'video/3gpp',
]

_PDF = [
'application/pdf',
]


class GeminiExp_20241206(GenAI): # pylint: disable=invalid-name
"""Gemini Experimental model launched on 12/06/2024."""

model = 'gemini-exp-1206'
supported_modalities = _PDF + _IMAGE_TYPES + _AUDIO_TYPES + _VIDEO_TYPES
supported_modalities = (
vertexai.DOCUMENT_TYPES
+ vertexai.IMAGE_TYPES
+ vertexai.AUDIO_TYPES
+ vertexai.VIDEO_TYPES
)


class GeminiExp_20241114(GenAI): # pylint: disable=invalid-name
"""Gemini Experimental model launched on 11/14/2024."""

model = 'gemini-exp-1114'
supported_modalities = _PDF + _IMAGE_TYPES + _AUDIO_TYPES + _VIDEO_TYPES
supported_modalities = (
vertexai.DOCUMENT_TYPES
+ vertexai.IMAGE_TYPES
+ vertexai.AUDIO_TYPES
+ vertexai.VIDEO_TYPES
)


class GeminiPro1_5(GenAI): # pylint: disable=invalid-name
"""Gemini Pro latest model."""

model = 'gemini-1.5-pro-latest'
supported_modalities = _PDF + _IMAGE_TYPES + _AUDIO_TYPES + _VIDEO_TYPES
supported_modalities = (
vertexai.DOCUMENT_TYPES
+ vertexai.IMAGE_TYPES
+ vertexai.AUDIO_TYPES
+ vertexai.VIDEO_TYPES
)


class GeminiFlash1_5(GenAI): # pylint: disable=invalid-name
"""Gemini Flash latest model."""

model = 'gemini-1.5-flash-latest'
supported_modalities = _PDF + _IMAGE_TYPES + _AUDIO_TYPES + _VIDEO_TYPES
supported_modalities = (
vertexai.DOCUMENT_TYPES
+ vertexai.IMAGE_TYPES
+ vertexai.AUDIO_TYPES
+ vertexai.VIDEO_TYPES
)


class GeminiPro(GenAI):
Expand All @@ -384,7 +366,7 @@ class GeminiProVision(GenAI):
"""Gemini Pro vision model."""

model = 'gemini-pro-vision'
supported_modalities = _IMAGE_TYPES + _VIDEO_TYPES
supported_modalities = vertexai.IMAGE_TYPES + vertexai.VIDEO_TYPES


class Palm2(GenAI):
Expand Down
19 changes: 10 additions & 9 deletions langfun/core/llms/vertexai.py
Original file line number Diff line number Diff line change
Expand Up @@ -343,15 +343,15 @@ def _message_from_content_parts(
return lf.AIMessage.from_chunks(chunks)


_IMAGE_TYPES = [
IMAGE_TYPES = [
'image/png',
'image/jpeg',
'image/webp',
'image/heic',
'image/heif',
]

_AUDIO_TYPES = [
AUDIO_TYPES = [
'audio/aac',
'audio/flac',
'audio/mp3',
Expand All @@ -362,10 +362,10 @@ def _message_from_content_parts(
'audio/opus',
'audio/pcm',
'audio/wav',
'audio/webm'
'audio/webm',
]

_VIDEO_TYPES = [
VIDEO_TYPES = [
'video/mov',
'video/mpeg',
'video/mpegps',
Expand All @@ -375,9 +375,10 @@ def _message_from_content_parts(
'video/wmv',
'video/x-flv',
'video/3gpp',
'video/quicktime',
]

_DOCUMENT_TYPES = [
DOCUMENT_TYPES = [
'application/pdf',
'text/plain',
'text/csv',
Expand All @@ -391,8 +392,8 @@ def _message_from_content_parts(
class VertexAIGemini1_5(VertexAI): # pylint: disable=invalid-name
"""Vertex AI Gemini 1.5 model."""

supported_modalities: pg.typing.List(str).freeze( # pytype: disable=invalid-annotation
_DOCUMENT_TYPES + _IMAGE_TYPES + _AUDIO_TYPES + _VIDEO_TYPES
supported_modalities: pg.typing.List(str).freeze( # pytype: disable=invalid-annotation
DOCUMENT_TYPES + IMAGE_TYPES + AUDIO_TYPES + VIDEO_TYPES
)


Expand Down Expand Up @@ -460,8 +461,8 @@ class VertexAIGeminiPro1Vision(VertexAI): # pylint: disable=invalid-name
"""Vertex AI Gemini 1.0 Pro Vision model."""

model = 'gemini-1.0-pro-vision'
supported_modalities: pg.typing.List(str).freeze( # pytype: disable=invalid-annotation
_IMAGE_TYPES + _VIDEO_TYPES
supported_modalities: pg.typing.List(str).freeze( # pytype: disable=invalid-annotation
IMAGE_TYPES + VIDEO_TYPES
)


Expand Down

0 comments on commit 1f805bf

Please sign in to comment.