Skip to content
Merged
2 changes: 2 additions & 0 deletions lambda-http/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ serde = { version = "^1", features = ["derive"] }
serde_json = "^1"
serde_urlencoded = "0.7.0"
query_map = { version = "0.5", features = ["url-query"] }
mime = "0.3.16"
encoding_rs = "0.8.31"

[dependencies.aws_lambda_events]
version = "^0.6.3"
Expand Down
45 changes: 40 additions & 5 deletions lambda-http/src/response.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ use aws_lambda_events::event::alb::AlbTargetGroupResponse;
use aws_lambda_events::event::apigw::ApiGatewayProxyResponse;
#[cfg(feature = "apigw_http")]
use aws_lambda_events::event::apigw::ApiGatewayV2httpResponse;
use encoding_rs::Encoding;
use http::header::CONTENT_ENCODING;
use http::HeaderMap;
use http::{
Expand All @@ -16,9 +17,10 @@ use http::{
};
use http_body::Body as HttpBody;
use hyper::body::to_bytes;
use mime::{Mime, CHARSET};
use serde::Serialize;
use std::borrow::Cow;
use std::future::ready;
use std::str::from_utf8;
use std::{fmt, future::Future, pin::Pin};

/// Representation of Lambda response
Expand Down Expand Up @@ -183,15 +185,15 @@ where
value.to_str().unwrap_or_default()
} else {
// Content-Type and Content-Encoding not set, passthrough as utf8 text
return convert_to_text(self);
return convert_to_text(self, "utf-8".to_string());
};

if content_type.starts_with("text")
|| content_type.starts_with("application/json")
|| content_type.starts_with("application/javascript")
|| content_type.starts_with("application/xml")
{
return convert_to_text(self);
return convert_to_text(self, content_type.to_string());
}

convert_to_binary(self)
Expand All @@ -206,15 +208,30 @@ where
Box::pin(async move { Body::from(to_bytes(body).await.expect("unable to read bytes from body").to_vec()) })
}

fn convert_to_text<B>(body: B) -> BodyFuture
fn convert_to_text<B>(body: B, content_type: String) -> BodyFuture
where
B: HttpBody + Unpin + 'static,
B::Error: fmt::Debug,
{
let mime_type = content_type.parse::<Mime>();

let encoding = match mime_type.as_ref() {
Ok(mime) => mime.get_param(CHARSET).unwrap_or(mime::UTF_8),
Err(_) => mime::UTF_8,
};

let label = encoding.as_ref().as_bytes();
let encoding = Encoding::for_label(label).unwrap_or(encoding_rs::UTF_8);

// assumes utf-8
Copy link
Contributor Author

@hugobast hugobast Jul 6, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've pushed changes that should be close, at least in spirit, to what was talked about in the last few days in the other thread, I'm not that comfortable with Rust so please make suggestions if the code here is suboptimal.

I have concerns about encoding @david-perez @bnusunny @calavera. I'm guessing that most things will be utf-8 (JSON?) but that's far from saying that everything is utf-8.

@bnusunny commented with an example code that's using reqwest so I looked at how the text fn is implemented on reqwest::Response. We could use the same approach here and parse the charset from the content-type if we'd like I'll defer to your comments/opinions on this.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

great work @hugobast ! If there is a path to check the charset, it might be worth doing it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's optional but here's a commit where encode based on the charset of the content-type, falling back to utf-8 in error cases: 2632aad

Box::pin(async move {
let bytes = to_bytes(body).await.expect("unable to read bytes from body");
Body::from(from_utf8(&bytes).expect("response body not utf-8"))
let (content, _, _) = encoding.decode(&bytes);

match content {
Cow::Borrowed(content) => Body::from(content),
Cow::Owned(content) => Body::from(content),
}
})
}

Expand Down Expand Up @@ -300,6 +317,24 @@ mod tests {
)
}

#[tokio::test]
async fn charset_content_type_header() {
// Drive the implementation by using `hyper::Body` instead of
// of `aws_lambda_events::encodings::Body`
let response = Response::builder()
.header(CONTENT_TYPE, "application/json; charset=utf-16")
.body(HyperBody::from("000000".as_bytes()))
.expect("unable to build http::Response");
let response = response.into_response().await;
let response = LambdaResponse::from_response(&RequestOrigin::ApiGatewayV2, response);

let json = serde_json::to_string(&response).expect("failed to serialize to json");
assert_eq!(
json,
r#"{"statusCode":200,"headers":{"content-type":"application/json; charset=utf-16"},"multiValueHeaders":{"content-type":["application/json; charset=utf-16"]},"body":"〰〰〰","isBase64Encoded":false,"cookies":[]}"#
)
}

#[tokio::test]
async fn content_headers_unset() {
// Drive the implementation by using `hyper::Body` instead of
Expand Down