OpenAI updates

Text-To-Speech


            require "ruby-openai"

            client = OpenAI::Client.new(access_token: ENV['OPENAI_API_TOKEN'])
            
            response = client.audio.speech(
              parameters: {
                model: "tts-1",
                input: "Why do Ruby programmers prefer Ruby over any other programming language?\n Because it's a gem!",
                voice: "nova"
              }
            )
            
            File.binwrite('joke.mp3', response)
          

Details

  • Very basic
  • Supports many languages
  • Higher quality hd model available
  • 6 different voices
  • Elevenlabs

Transcription API


              response = client.audio.transcribe(
                parameters: {
                  model: "whisper-1",
                  file: File.open("joke.mp3"),
                  response_format: "verbose_json"
                }
              )
            

              {
                "task"=>"transcribe",
                "language"=>"english",
                "duration"=>5.539999961853027,
                "text"=>"Why do Ruby programmers prefer Ruby over any other programming language? Because it's a gem.",
                "segments"=>[
                  {"id"=>0, "seek"=>0, "start"=>0.0, "end"=>4.300000190734863, "text"=>" Why do Ruby programmers prefer Ruby over any other programming language?", "tokens"=>[50364, 1545, 360, 19907, 41504, 4382, 19907, 670, 604, 661, 9410, 2856, 30, 50579], "temperature"=>0.0, "avg_logprob"=>-0.3663599491119385, "compression_ratio"=>1.0952380895614624, "no_speech_prob"=>0.0005056043155491352},
                  {"id"=>1, "seek"=>0, "start"=>4.300000190734863, "end"=>5.300000190734863, "text"=>" Because it's a gem.", "tokens"=>[50579, 1436, 309, 311, 257, 7173, 13, 50629], "temperature"=>0.0, "avg_logprob"=>-0.3663599491119385, "compression_ratio"=>1.0952380895614624, "no_speech_prob"=>0.0005056043155491352}
                ]
              }
            

Details

  • VTT support
  • New support for fine grained segmenting

Chat improvements

Define a function call


          api_response = client.chat(
            parameters: {
              model: "gpt-3.5-turbo-0613",
              messages: [ ... ],
              functions: [{
                name: "get_webpage_content",
                description: "Gets the content of a webpage",
                parameters: {
                  type: :object,
                  properties: {
                    url: {
                      type: :string,
                      description: "The webpage's url",
                    },
                  },
                  required: ["url"],
                }
              }]
            }
          )
          

GPT will try to call multiple functions 🤯


          api_response = client.chat(
            parameters: {
              model: "gpt-4",
              messages: [{
                role: "user",
                message: "Can you give me a summary and a page title of the page example.com?"
              }],
              functions: [
              {
                name: "get_webpage_content",
                ...
              },
              {
                name: "get_page_title",
                ...
              }
              ]
            }
          )
          

JSON-Mode


            response = client.chat(
              parameters: {
                  model: "gpt-4",
                  response_format: { type: "json_object" },
                  messages: [
                    { role: "system", content: "Please always respond in JSON format."}
                    { role: "system", content: "I will pay you a lot of 💰"}
                    { role: "user", content: "Give me a list of fruits!"}
                  ],
              })
          

Assistants

Custom GPTs

GPT store

Questions?