This repository has been archived on 2025-09-14. You can view files and clone it, but cannot push or open issues or pull requests.
Files

854 lines
57 KiB
C++

/**
* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
* SPDX-License-Identifier: Apache-2.0.
*/
#pragma once
#include <aws/machinelearning/MachineLearning_EXPORTS.h>
#include <aws/core/utils/memory/stl/AWSString.h>
#include <utility>
namespace Aws
{
namespace Utils
{
namespace Json
{
class JsonValue;
class JsonView;
} // namespace Json
} // namespace Utils
namespace MachineLearning
{
namespace Model
{
/**
* <p> Describes the data specification of a <code>DataSource</code>.</p><p><h3>See
* Also:</h3> <a
* href="http://docs.aws.amazon.com/goto/WebAPI/machinelearning-2014-12-12/S3DataSpec">AWS
* API Reference</a></p>
*/
class AWS_MACHINELEARNING_API S3DataSpec
{
public:
S3DataSpec();
S3DataSpec(Aws::Utils::Json::JsonView jsonValue);
S3DataSpec& operator=(Aws::Utils::Json::JsonView jsonValue);
Aws::Utils::Json::JsonValue Jsonize() const;
/**
* <p>The location of the data file(s) used by a <code>DataSource</code>. The URI
* specifies a data file or an Amazon Simple Storage Service (Amazon S3) directory
* or bucket containing data files.</p>
*/
inline const Aws::String& GetDataLocationS3() const{ return m_dataLocationS3; }
/**
* <p>The location of the data file(s) used by a <code>DataSource</code>. The URI
* specifies a data file or an Amazon Simple Storage Service (Amazon S3) directory
* or bucket containing data files.</p>
*/
inline bool DataLocationS3HasBeenSet() const { return m_dataLocationS3HasBeenSet; }
/**
* <p>The location of the data file(s) used by a <code>DataSource</code>. The URI
* specifies a data file or an Amazon Simple Storage Service (Amazon S3) directory
* or bucket containing data files.</p>
*/
inline void SetDataLocationS3(const Aws::String& value) { m_dataLocationS3HasBeenSet = true; m_dataLocationS3 = value; }
/**
* <p>The location of the data file(s) used by a <code>DataSource</code>. The URI
* specifies a data file or an Amazon Simple Storage Service (Amazon S3) directory
* or bucket containing data files.</p>
*/
inline void SetDataLocationS3(Aws::String&& value) { m_dataLocationS3HasBeenSet = true; m_dataLocationS3 = std::move(value); }
/**
* <p>The location of the data file(s) used by a <code>DataSource</code>. The URI
* specifies a data file or an Amazon Simple Storage Service (Amazon S3) directory
* or bucket containing data files.</p>
*/
inline void SetDataLocationS3(const char* value) { m_dataLocationS3HasBeenSet = true; m_dataLocationS3.assign(value); }
/**
* <p>The location of the data file(s) used by a <code>DataSource</code>. The URI
* specifies a data file or an Amazon Simple Storage Service (Amazon S3) directory
* or bucket containing data files.</p>
*/
inline S3DataSpec& WithDataLocationS3(const Aws::String& value) { SetDataLocationS3(value); return *this;}
/**
* <p>The location of the data file(s) used by a <code>DataSource</code>. The URI
* specifies a data file or an Amazon Simple Storage Service (Amazon S3) directory
* or bucket containing data files.</p>
*/
inline S3DataSpec& WithDataLocationS3(Aws::String&& value) { SetDataLocationS3(std::move(value)); return *this;}
/**
* <p>The location of the data file(s) used by a <code>DataSource</code>. The URI
* specifies a data file or an Amazon Simple Storage Service (Amazon S3) directory
* or bucket containing data files.</p>
*/
inline S3DataSpec& WithDataLocationS3(const char* value) { SetDataLocationS3(value); return *this;}
/**
* <p>A JSON string that represents the splitting and rearrangement processing to
* be applied to a <code>DataSource</code>. If the <code>DataRearrangement</code>
* parameter is not provided, all of the input data is used to create the
* <code>Datasource</code>.</p> <p>There are multiple parameters that control what
* data is used to create a datasource:</p> <ul>
* <li><p><b><code>percentBegin</code></b></p> <p>Use <code>percentBegin</code> to
* indicate the beginning of the range of the data used to create the Datasource.
* If you do not include <code>percentBegin</code> and <code>percentEnd</code>,
* Amazon ML includes all of the data when creating the datasource.</p></li>
* <li><p><b><code>percentEnd</code></b></p> <p>Use <code>percentEnd</code> to
* indicate the end of the range of the data used to create the Datasource. If you
* do not include <code>percentBegin</code> and <code>percentEnd</code>, Amazon ML
* includes all of the data when creating the datasource.</p></li>
* <li><p><b><code>complement</code></b></p> <p>The <code>complement</code>
* parameter instructs Amazon ML to use the data that is not included in the range
* of <code>percentBegin</code> to <code>percentEnd</code> to create a datasource.
* The <code>complement</code> parameter is useful if you need to create
* complementary datasources for training and evaluation. To create a complementary
* datasource, use the same values for <code>percentBegin</code> and
* <code>percentEnd</code>, along with the <code>complement</code> parameter.</p>
* <p>For example, the following two datasources do not share any data, and can be
* used to train and evaluate a model. The first datasource has 25 percent of the
* data, and the second one has 75 percent of the data.</p> <p>Datasource for
* evaluation: <code>{"splitting":{"percentBegin":0, "percentEnd":25}}</code></p>
* <p>Datasource for training: <code>{"splitting":{"percentBegin":0,
* "percentEnd":25, "complement":"true"}}</code></p> </li>
* <li><p><b><code>strategy</code></b></p> <p>To change how Amazon ML splits the
* data for a datasource, use the <code>strategy</code> parameter.</p> <p>The
* default value for the <code>strategy</code> parameter is
* <code>sequential</code>, meaning that Amazon ML takes all of the data records
* between the <code>percentBegin</code> and <code>percentEnd</code> parameters for
* the datasource, in the order that the records appear in the input data.</p>
* <p>The following two <code>DataRearrangement</code> lines are examples of
* sequentially ordered training and evaluation datasources:</p> <p>Datasource for
* evaluation: <code>{"splitting":{"percentBegin":70, "percentEnd":100,
* "strategy":"sequential"}}</code></p> <p>Datasource for training:
* <code>{"splitting":{"percentBegin":70, "percentEnd":100,
* "strategy":"sequential", "complement":"true"}}</code></p> <p>To randomly split
* the input data into the proportions indicated by the percentBegin and percentEnd
* parameters, set the <code>strategy</code> parameter to <code>random</code> and
* provide a string that is used as the seed value for the random data splitting
* (for example, you can use the S3 path to your data as the random seed string).
* If you choose the random split strategy, Amazon ML assigns each row of data a
* pseudo-random number between 0 and 100, and then selects the rows that have an
* assigned number between <code>percentBegin</code> and <code>percentEnd</code>.
* Pseudo-random numbers are assigned using both the input seed string value and
* the byte offset as a seed, so changing the data results in a different split.
* Any existing ordering is preserved. The random splitting strategy ensures that
* variables in the training and evaluation data are distributed similarly. It is
* useful in the cases where the input data may have an implicit sort order, which
* would otherwise result in training and evaluation datasources containing
* non-similar data records.</p> <p>The following two
* <code>DataRearrangement</code> lines are examples of non-sequentially ordered
* training and evaluation datasources:</p> <p>Datasource for evaluation:
* <code>{"splitting":{"percentBegin":70, "percentEnd":100, "strategy":"random",
* "randomSeed"="s3://my_s3_path/bucket/file.csv"}}</code></p> <p>Datasource for
* training: <code>{"splitting":{"percentBegin":70, "percentEnd":100,
* "strategy":"random", "randomSeed"="s3://my_s3_path/bucket/file.csv",
* "complement":"true"}}</code></p> </li> </ul>
*/
inline const Aws::String& GetDataRearrangement() const{ return m_dataRearrangement; }
/**
* <p>A JSON string that represents the splitting and rearrangement processing to
* be applied to a <code>DataSource</code>. If the <code>DataRearrangement</code>
* parameter is not provided, all of the input data is used to create the
* <code>Datasource</code>.</p> <p>There are multiple parameters that control what
* data is used to create a datasource:</p> <ul>
* <li><p><b><code>percentBegin</code></b></p> <p>Use <code>percentBegin</code> to
* indicate the beginning of the range of the data used to create the Datasource.
* If you do not include <code>percentBegin</code> and <code>percentEnd</code>,
* Amazon ML includes all of the data when creating the datasource.</p></li>
* <li><p><b><code>percentEnd</code></b></p> <p>Use <code>percentEnd</code> to
* indicate the end of the range of the data used to create the Datasource. If you
* do not include <code>percentBegin</code> and <code>percentEnd</code>, Amazon ML
* includes all of the data when creating the datasource.</p></li>
* <li><p><b><code>complement</code></b></p> <p>The <code>complement</code>
* parameter instructs Amazon ML to use the data that is not included in the range
* of <code>percentBegin</code> to <code>percentEnd</code> to create a datasource.
* The <code>complement</code> parameter is useful if you need to create
* complementary datasources for training and evaluation. To create a complementary
* datasource, use the same values for <code>percentBegin</code> and
* <code>percentEnd</code>, along with the <code>complement</code> parameter.</p>
* <p>For example, the following two datasources do not share any data, and can be
* used to train and evaluate a model. The first datasource has 25 percent of the
* data, and the second one has 75 percent of the data.</p> <p>Datasource for
* evaluation: <code>{"splitting":{"percentBegin":0, "percentEnd":25}}</code></p>
* <p>Datasource for training: <code>{"splitting":{"percentBegin":0,
* "percentEnd":25, "complement":"true"}}</code></p> </li>
* <li><p><b><code>strategy</code></b></p> <p>To change how Amazon ML splits the
* data for a datasource, use the <code>strategy</code> parameter.</p> <p>The
* default value for the <code>strategy</code> parameter is
* <code>sequential</code>, meaning that Amazon ML takes all of the data records
* between the <code>percentBegin</code> and <code>percentEnd</code> parameters for
* the datasource, in the order that the records appear in the input data.</p>
* <p>The following two <code>DataRearrangement</code> lines are examples of
* sequentially ordered training and evaluation datasources:</p> <p>Datasource for
* evaluation: <code>{"splitting":{"percentBegin":70, "percentEnd":100,
* "strategy":"sequential"}}</code></p> <p>Datasource for training:
* <code>{"splitting":{"percentBegin":70, "percentEnd":100,
* "strategy":"sequential", "complement":"true"}}</code></p> <p>To randomly split
* the input data into the proportions indicated by the percentBegin and percentEnd
* parameters, set the <code>strategy</code> parameter to <code>random</code> and
* provide a string that is used as the seed value for the random data splitting
* (for example, you can use the S3 path to your data as the random seed string).
* If you choose the random split strategy, Amazon ML assigns each row of data a
* pseudo-random number between 0 and 100, and then selects the rows that have an
* assigned number between <code>percentBegin</code> and <code>percentEnd</code>.
* Pseudo-random numbers are assigned using both the input seed string value and
* the byte offset as a seed, so changing the data results in a different split.
* Any existing ordering is preserved. The random splitting strategy ensures that
* variables in the training and evaluation data are distributed similarly. It is
* useful in the cases where the input data may have an implicit sort order, which
* would otherwise result in training and evaluation datasources containing
* non-similar data records.</p> <p>The following two
* <code>DataRearrangement</code> lines are examples of non-sequentially ordered
* training and evaluation datasources:</p> <p>Datasource for evaluation:
* <code>{"splitting":{"percentBegin":70, "percentEnd":100, "strategy":"random",
* "randomSeed"="s3://my_s3_path/bucket/file.csv"}}</code></p> <p>Datasource for
* training: <code>{"splitting":{"percentBegin":70, "percentEnd":100,
* "strategy":"random", "randomSeed"="s3://my_s3_path/bucket/file.csv",
* "complement":"true"}}</code></p> </li> </ul>
*/
inline bool DataRearrangementHasBeenSet() const { return m_dataRearrangementHasBeenSet; }
/**
* <p>A JSON string that represents the splitting and rearrangement processing to
* be applied to a <code>DataSource</code>. If the <code>DataRearrangement</code>
* parameter is not provided, all of the input data is used to create the
* <code>Datasource</code>.</p> <p>There are multiple parameters that control what
* data is used to create a datasource:</p> <ul>
* <li><p><b><code>percentBegin</code></b></p> <p>Use <code>percentBegin</code> to
* indicate the beginning of the range of the data used to create the Datasource.
* If you do not include <code>percentBegin</code> and <code>percentEnd</code>,
* Amazon ML includes all of the data when creating the datasource.</p></li>
* <li><p><b><code>percentEnd</code></b></p> <p>Use <code>percentEnd</code> to
* indicate the end of the range of the data used to create the Datasource. If you
* do not include <code>percentBegin</code> and <code>percentEnd</code>, Amazon ML
* includes all of the data when creating the datasource.</p></li>
* <li><p><b><code>complement</code></b></p> <p>The <code>complement</code>
* parameter instructs Amazon ML to use the data that is not included in the range
* of <code>percentBegin</code> to <code>percentEnd</code> to create a datasource.
* The <code>complement</code> parameter is useful if you need to create
* complementary datasources for training and evaluation. To create a complementary
* datasource, use the same values for <code>percentBegin</code> and
* <code>percentEnd</code>, along with the <code>complement</code> parameter.</p>
* <p>For example, the following two datasources do not share any data, and can be
* used to train and evaluate a model. The first datasource has 25 percent of the
* data, and the second one has 75 percent of the data.</p> <p>Datasource for
* evaluation: <code>{"splitting":{"percentBegin":0, "percentEnd":25}}</code></p>
* <p>Datasource for training: <code>{"splitting":{"percentBegin":0,
* "percentEnd":25, "complement":"true"}}</code></p> </li>
* <li><p><b><code>strategy</code></b></p> <p>To change how Amazon ML splits the
* data for a datasource, use the <code>strategy</code> parameter.</p> <p>The
* default value for the <code>strategy</code> parameter is
* <code>sequential</code>, meaning that Amazon ML takes all of the data records
* between the <code>percentBegin</code> and <code>percentEnd</code> parameters for
* the datasource, in the order that the records appear in the input data.</p>
* <p>The following two <code>DataRearrangement</code> lines are examples of
* sequentially ordered training and evaluation datasources:</p> <p>Datasource for
* evaluation: <code>{"splitting":{"percentBegin":70, "percentEnd":100,
* "strategy":"sequential"}}</code></p> <p>Datasource for training:
* <code>{"splitting":{"percentBegin":70, "percentEnd":100,
* "strategy":"sequential", "complement":"true"}}</code></p> <p>To randomly split
* the input data into the proportions indicated by the percentBegin and percentEnd
* parameters, set the <code>strategy</code> parameter to <code>random</code> and
* provide a string that is used as the seed value for the random data splitting
* (for example, you can use the S3 path to your data as the random seed string).
* If you choose the random split strategy, Amazon ML assigns each row of data a
* pseudo-random number between 0 and 100, and then selects the rows that have an
* assigned number between <code>percentBegin</code> and <code>percentEnd</code>.
* Pseudo-random numbers are assigned using both the input seed string value and
* the byte offset as a seed, so changing the data results in a different split.
* Any existing ordering is preserved. The random splitting strategy ensures that
* variables in the training and evaluation data are distributed similarly. It is
* useful in the cases where the input data may have an implicit sort order, which
* would otherwise result in training and evaluation datasources containing
* non-similar data records.</p> <p>The following two
* <code>DataRearrangement</code> lines are examples of non-sequentially ordered
* training and evaluation datasources:</p> <p>Datasource for evaluation:
* <code>{"splitting":{"percentBegin":70, "percentEnd":100, "strategy":"random",
* "randomSeed"="s3://my_s3_path/bucket/file.csv"}}</code></p> <p>Datasource for
* training: <code>{"splitting":{"percentBegin":70, "percentEnd":100,
* "strategy":"random", "randomSeed"="s3://my_s3_path/bucket/file.csv",
* "complement":"true"}}</code></p> </li> </ul>
*/
inline void SetDataRearrangement(const Aws::String& value) { m_dataRearrangementHasBeenSet = true; m_dataRearrangement = value; }
/**
* <p>A JSON string that represents the splitting and rearrangement processing to
* be applied to a <code>DataSource</code>. If the <code>DataRearrangement</code>
* parameter is not provided, all of the input data is used to create the
* <code>Datasource</code>.</p> <p>There are multiple parameters that control what
* data is used to create a datasource:</p> <ul>
* <li><p><b><code>percentBegin</code></b></p> <p>Use <code>percentBegin</code> to
* indicate the beginning of the range of the data used to create the Datasource.
* If you do not include <code>percentBegin</code> and <code>percentEnd</code>,
* Amazon ML includes all of the data when creating the datasource.</p></li>
* <li><p><b><code>percentEnd</code></b></p> <p>Use <code>percentEnd</code> to
* indicate the end of the range of the data used to create the Datasource. If you
* do not include <code>percentBegin</code> and <code>percentEnd</code>, Amazon ML
* includes all of the data when creating the datasource.</p></li>
* <li><p><b><code>complement</code></b></p> <p>The <code>complement</code>
* parameter instructs Amazon ML to use the data that is not included in the range
* of <code>percentBegin</code> to <code>percentEnd</code> to create a datasource.
* The <code>complement</code> parameter is useful if you need to create
* complementary datasources for training and evaluation. To create a complementary
* datasource, use the same values for <code>percentBegin</code> and
* <code>percentEnd</code>, along with the <code>complement</code> parameter.</p>
* <p>For example, the following two datasources do not share any data, and can be
* used to train and evaluate a model. The first datasource has 25 percent of the
* data, and the second one has 75 percent of the data.</p> <p>Datasource for
* evaluation: <code>{"splitting":{"percentBegin":0, "percentEnd":25}}</code></p>
* <p>Datasource for training: <code>{"splitting":{"percentBegin":0,
* "percentEnd":25, "complement":"true"}}</code></p> </li>
* <li><p><b><code>strategy</code></b></p> <p>To change how Amazon ML splits the
* data for a datasource, use the <code>strategy</code> parameter.</p> <p>The
* default value for the <code>strategy</code> parameter is
* <code>sequential</code>, meaning that Amazon ML takes all of the data records
* between the <code>percentBegin</code> and <code>percentEnd</code> parameters for
* the datasource, in the order that the records appear in the input data.</p>
* <p>The following two <code>DataRearrangement</code> lines are examples of
* sequentially ordered training and evaluation datasources:</p> <p>Datasource for
* evaluation: <code>{"splitting":{"percentBegin":70, "percentEnd":100,
* "strategy":"sequential"}}</code></p> <p>Datasource for training:
* <code>{"splitting":{"percentBegin":70, "percentEnd":100,
* "strategy":"sequential", "complement":"true"}}</code></p> <p>To randomly split
* the input data into the proportions indicated by the percentBegin and percentEnd
* parameters, set the <code>strategy</code> parameter to <code>random</code> and
* provide a string that is used as the seed value for the random data splitting
* (for example, you can use the S3 path to your data as the random seed string).
* If you choose the random split strategy, Amazon ML assigns each row of data a
* pseudo-random number between 0 and 100, and then selects the rows that have an
* assigned number between <code>percentBegin</code> and <code>percentEnd</code>.
* Pseudo-random numbers are assigned using both the input seed string value and
* the byte offset as a seed, so changing the data results in a different split.
* Any existing ordering is preserved. The random splitting strategy ensures that
* variables in the training and evaluation data are distributed similarly. It is
* useful in the cases where the input data may have an implicit sort order, which
* would otherwise result in training and evaluation datasources containing
* non-similar data records.</p> <p>The following two
* <code>DataRearrangement</code> lines are examples of non-sequentially ordered
* training and evaluation datasources:</p> <p>Datasource for evaluation:
* <code>{"splitting":{"percentBegin":70, "percentEnd":100, "strategy":"random",
* "randomSeed"="s3://my_s3_path/bucket/file.csv"}}</code></p> <p>Datasource for
* training: <code>{"splitting":{"percentBegin":70, "percentEnd":100,
* "strategy":"random", "randomSeed"="s3://my_s3_path/bucket/file.csv",
* "complement":"true"}}</code></p> </li> </ul>
*/
inline void SetDataRearrangement(Aws::String&& value) { m_dataRearrangementHasBeenSet = true; m_dataRearrangement = std::move(value); }
/**
* <p>A JSON string that represents the splitting and rearrangement processing to
* be applied to a <code>DataSource</code>. If the <code>DataRearrangement</code>
* parameter is not provided, all of the input data is used to create the
* <code>Datasource</code>.</p> <p>There are multiple parameters that control what
* data is used to create a datasource:</p> <ul>
* <li><p><b><code>percentBegin</code></b></p> <p>Use <code>percentBegin</code> to
* indicate the beginning of the range of the data used to create the Datasource.
* If you do not include <code>percentBegin</code> and <code>percentEnd</code>,
* Amazon ML includes all of the data when creating the datasource.</p></li>
* <li><p><b><code>percentEnd</code></b></p> <p>Use <code>percentEnd</code> to
* indicate the end of the range of the data used to create the Datasource. If you
* do not include <code>percentBegin</code> and <code>percentEnd</code>, Amazon ML
* includes all of the data when creating the datasource.</p></li>
* <li><p><b><code>complement</code></b></p> <p>The <code>complement</code>
* parameter instructs Amazon ML to use the data that is not included in the range
* of <code>percentBegin</code> to <code>percentEnd</code> to create a datasource.
* The <code>complement</code> parameter is useful if you need to create
* complementary datasources for training and evaluation. To create a complementary
* datasource, use the same values for <code>percentBegin</code> and
* <code>percentEnd</code>, along with the <code>complement</code> parameter.</p>
* <p>For example, the following two datasources do not share any data, and can be
* used to train and evaluate a model. The first datasource has 25 percent of the
* data, and the second one has 75 percent of the data.</p> <p>Datasource for
* evaluation: <code>{"splitting":{"percentBegin":0, "percentEnd":25}}</code></p>
* <p>Datasource for training: <code>{"splitting":{"percentBegin":0,
* "percentEnd":25, "complement":"true"}}</code></p> </li>
* <li><p><b><code>strategy</code></b></p> <p>To change how Amazon ML splits the
* data for a datasource, use the <code>strategy</code> parameter.</p> <p>The
* default value for the <code>strategy</code> parameter is
* <code>sequential</code>, meaning that Amazon ML takes all of the data records
* between the <code>percentBegin</code> and <code>percentEnd</code> parameters for
* the datasource, in the order that the records appear in the input data.</p>
* <p>The following two <code>DataRearrangement</code> lines are examples of
* sequentially ordered training and evaluation datasources:</p> <p>Datasource for
* evaluation: <code>{"splitting":{"percentBegin":70, "percentEnd":100,
* "strategy":"sequential"}}</code></p> <p>Datasource for training:
* <code>{"splitting":{"percentBegin":70, "percentEnd":100,
* "strategy":"sequential", "complement":"true"}}</code></p> <p>To randomly split
* the input data into the proportions indicated by the percentBegin and percentEnd
* parameters, set the <code>strategy</code> parameter to <code>random</code> and
* provide a string that is used as the seed value for the random data splitting
* (for example, you can use the S3 path to your data as the random seed string).
* If you choose the random split strategy, Amazon ML assigns each row of data a
* pseudo-random number between 0 and 100, and then selects the rows that have an
* assigned number between <code>percentBegin</code> and <code>percentEnd</code>.
* Pseudo-random numbers are assigned using both the input seed string value and
* the byte offset as a seed, so changing the data results in a different split.
* Any existing ordering is preserved. The random splitting strategy ensures that
* variables in the training and evaluation data are distributed similarly. It is
* useful in the cases where the input data may have an implicit sort order, which
* would otherwise result in training and evaluation datasources containing
* non-similar data records.</p> <p>The following two
* <code>DataRearrangement</code> lines are examples of non-sequentially ordered
* training and evaluation datasources:</p> <p>Datasource for evaluation:
* <code>{"splitting":{"percentBegin":70, "percentEnd":100, "strategy":"random",
* "randomSeed"="s3://my_s3_path/bucket/file.csv"}}</code></p> <p>Datasource for
* training: <code>{"splitting":{"percentBegin":70, "percentEnd":100,
* "strategy":"random", "randomSeed"="s3://my_s3_path/bucket/file.csv",
* "complement":"true"}}</code></p> </li> </ul>
*/
inline void SetDataRearrangement(const char* value) { m_dataRearrangementHasBeenSet = true; m_dataRearrangement.assign(value); }
/**
* <p>A JSON string that represents the splitting and rearrangement processing to
* be applied to a <code>DataSource</code>. If the <code>DataRearrangement</code>
* parameter is not provided, all of the input data is used to create the
* <code>Datasource</code>.</p> <p>There are multiple parameters that control what
* data is used to create a datasource:</p> <ul>
* <li><p><b><code>percentBegin</code></b></p> <p>Use <code>percentBegin</code> to
* indicate the beginning of the range of the data used to create the Datasource.
* If you do not include <code>percentBegin</code> and <code>percentEnd</code>,
* Amazon ML includes all of the data when creating the datasource.</p></li>
* <li><p><b><code>percentEnd</code></b></p> <p>Use <code>percentEnd</code> to
* indicate the end of the range of the data used to create the Datasource. If you
* do not include <code>percentBegin</code> and <code>percentEnd</code>, Amazon ML
* includes all of the data when creating the datasource.</p></li>
* <li><p><b><code>complement</code></b></p> <p>The <code>complement</code>
* parameter instructs Amazon ML to use the data that is not included in the range
* of <code>percentBegin</code> to <code>percentEnd</code> to create a datasource.
* The <code>complement</code> parameter is useful if you need to create
* complementary datasources for training and evaluation. To create a complementary
* datasource, use the same values for <code>percentBegin</code> and
* <code>percentEnd</code>, along with the <code>complement</code> parameter.</p>
* <p>For example, the following two datasources do not share any data, and can be
* used to train and evaluate a model. The first datasource has 25 percent of the
* data, and the second one has 75 percent of the data.</p> <p>Datasource for
* evaluation: <code>{"splitting":{"percentBegin":0, "percentEnd":25}}</code></p>
* <p>Datasource for training: <code>{"splitting":{"percentBegin":0,
* "percentEnd":25, "complement":"true"}}</code></p> </li>
* <li><p><b><code>strategy</code></b></p> <p>To change how Amazon ML splits the
* data for a datasource, use the <code>strategy</code> parameter.</p> <p>The
* default value for the <code>strategy</code> parameter is
* <code>sequential</code>, meaning that Amazon ML takes all of the data records
* between the <code>percentBegin</code> and <code>percentEnd</code> parameters for
* the datasource, in the order that the records appear in the input data.</p>
* <p>The following two <code>DataRearrangement</code> lines are examples of
* sequentially ordered training and evaluation datasources:</p> <p>Datasource for
* evaluation: <code>{"splitting":{"percentBegin":70, "percentEnd":100,
* "strategy":"sequential"}}</code></p> <p>Datasource for training:
* <code>{"splitting":{"percentBegin":70, "percentEnd":100,
* "strategy":"sequential", "complement":"true"}}</code></p> <p>To randomly split
* the input data into the proportions indicated by the percentBegin and percentEnd
* parameters, set the <code>strategy</code> parameter to <code>random</code> and
* provide a string that is used as the seed value for the random data splitting
* (for example, you can use the S3 path to your data as the random seed string).
* If you choose the random split strategy, Amazon ML assigns each row of data a
* pseudo-random number between 0 and 100, and then selects the rows that have an
* assigned number between <code>percentBegin</code> and <code>percentEnd</code>.
* Pseudo-random numbers are assigned using both the input seed string value and
* the byte offset as a seed, so changing the data results in a different split.
* Any existing ordering is preserved. The random splitting strategy ensures that
* variables in the training and evaluation data are distributed similarly. It is
* useful in the cases where the input data may have an implicit sort order, which
* would otherwise result in training and evaluation datasources containing
* non-similar data records.</p> <p>The following two
* <code>DataRearrangement</code> lines are examples of non-sequentially ordered
* training and evaluation datasources:</p> <p>Datasource for evaluation:
* <code>{"splitting":{"percentBegin":70, "percentEnd":100, "strategy":"random",
* "randomSeed"="s3://my_s3_path/bucket/file.csv"}}</code></p> <p>Datasource for
* training: <code>{"splitting":{"percentBegin":70, "percentEnd":100,
* "strategy":"random", "randomSeed"="s3://my_s3_path/bucket/file.csv",
* "complement":"true"}}</code></p> </li> </ul>
*/
inline S3DataSpec& WithDataRearrangement(const Aws::String& value) { SetDataRearrangement(value); return *this;}
/**
* <p>A JSON string that represents the splitting and rearrangement processing to
* be applied to a <code>DataSource</code>. If the <code>DataRearrangement</code>
* parameter is not provided, all of the input data is used to create the
* <code>Datasource</code>.</p> <p>There are multiple parameters that control what
* data is used to create a datasource:</p> <ul>
* <li><p><b><code>percentBegin</code></b></p> <p>Use <code>percentBegin</code> to
* indicate the beginning of the range of the data used to create the Datasource.
* If you do not include <code>percentBegin</code> and <code>percentEnd</code>,
* Amazon ML includes all of the data when creating the datasource.</p></li>
* <li><p><b><code>percentEnd</code></b></p> <p>Use <code>percentEnd</code> to
* indicate the end of the range of the data used to create the Datasource. If you
* do not include <code>percentBegin</code> and <code>percentEnd</code>, Amazon ML
* includes all of the data when creating the datasource.</p></li>
* <li><p><b><code>complement</code></b></p> <p>The <code>complement</code>
* parameter instructs Amazon ML to use the data that is not included in the range
* of <code>percentBegin</code> to <code>percentEnd</code> to create a datasource.
* The <code>complement</code> parameter is useful if you need to create
* complementary datasources for training and evaluation. To create a complementary
* datasource, use the same values for <code>percentBegin</code> and
* <code>percentEnd</code>, along with the <code>complement</code> parameter.</p>
* <p>For example, the following two datasources do not share any data, and can be
* used to train and evaluate a model. The first datasource has 25 percent of the
* data, and the second one has 75 percent of the data.</p> <p>Datasource for
* evaluation: <code>{"splitting":{"percentBegin":0, "percentEnd":25}}</code></p>
* <p>Datasource for training: <code>{"splitting":{"percentBegin":0,
* "percentEnd":25, "complement":"true"}}</code></p> </li>
* <li><p><b><code>strategy</code></b></p> <p>To change how Amazon ML splits the
* data for a datasource, use the <code>strategy</code> parameter.</p> <p>The
* default value for the <code>strategy</code> parameter is
* <code>sequential</code>, meaning that Amazon ML takes all of the data records
* between the <code>percentBegin</code> and <code>percentEnd</code> parameters for
* the datasource, in the order that the records appear in the input data.</p>
* <p>The following two <code>DataRearrangement</code> lines are examples of
* sequentially ordered training and evaluation datasources:</p> <p>Datasource for
* evaluation: <code>{"splitting":{"percentBegin":70, "percentEnd":100,
* "strategy":"sequential"}}</code></p> <p>Datasource for training:
* <code>{"splitting":{"percentBegin":70, "percentEnd":100,
* "strategy":"sequential", "complement":"true"}}</code></p> <p>To randomly split
* the input data into the proportions indicated by the percentBegin and percentEnd
* parameters, set the <code>strategy</code> parameter to <code>random</code> and
* provide a string that is used as the seed value for the random data splitting
* (for example, you can use the S3 path to your data as the random seed string).
* If you choose the random split strategy, Amazon ML assigns each row of data a
* pseudo-random number between 0 and 100, and then selects the rows that have an
* assigned number between <code>percentBegin</code> and <code>percentEnd</code>.
* Pseudo-random numbers are assigned using both the input seed string value and
* the byte offset as a seed, so changing the data results in a different split.
* Any existing ordering is preserved. The random splitting strategy ensures that
* variables in the training and evaluation data are distributed similarly. It is
* useful in the cases where the input data may have an implicit sort order, which
* would otherwise result in training and evaluation datasources containing
* non-similar data records.</p> <p>The following two
* <code>DataRearrangement</code> lines are examples of non-sequentially ordered
* training and evaluation datasources:</p> <p>Datasource for evaluation:
* <code>{"splitting":{"percentBegin":70, "percentEnd":100, "strategy":"random",
* "randomSeed"="s3://my_s3_path/bucket/file.csv"}}</code></p> <p>Datasource for
* training: <code>{"splitting":{"percentBegin":70, "percentEnd":100,
* "strategy":"random", "randomSeed"="s3://my_s3_path/bucket/file.csv",
* "complement":"true"}}</code></p> </li> </ul>
*/
inline S3DataSpec& WithDataRearrangement(Aws::String&& value) { SetDataRearrangement(std::move(value)); return *this;}
/**
* <p>A JSON string that represents the splitting and rearrangement processing to
* be applied to a <code>DataSource</code>. If the <code>DataRearrangement</code>
* parameter is not provided, all of the input data is used to create the
* <code>Datasource</code>.</p> <p>There are multiple parameters that control what
* data is used to create a datasource:</p> <ul>
* <li><p><b><code>percentBegin</code></b></p> <p>Use <code>percentBegin</code> to
* indicate the beginning of the range of the data used to create the Datasource.
* If you do not include <code>percentBegin</code> and <code>percentEnd</code>,
* Amazon ML includes all of the data when creating the datasource.</p></li>
* <li><p><b><code>percentEnd</code></b></p> <p>Use <code>percentEnd</code> to
* indicate the end of the range of the data used to create the Datasource. If you
* do not include <code>percentBegin</code> and <code>percentEnd</code>, Amazon ML
* includes all of the data when creating the datasource.</p></li>
* <li><p><b><code>complement</code></b></p> <p>The <code>complement</code>
* parameter instructs Amazon ML to use the data that is not included in the range
* of <code>percentBegin</code> to <code>percentEnd</code> to create a datasource.
* The <code>complement</code> parameter is useful if you need to create
* complementary datasources for training and evaluation. To create a complementary
* datasource, use the same values for <code>percentBegin</code> and
* <code>percentEnd</code>, along with the <code>complement</code> parameter.</p>
* <p>For example, the following two datasources do not share any data, and can be
* used to train and evaluate a model. The first datasource has 25 percent of the
* data, and the second one has 75 percent of the data.</p> <p>Datasource for
* evaluation: <code>{"splitting":{"percentBegin":0, "percentEnd":25}}</code></p>
* <p>Datasource for training: <code>{"splitting":{"percentBegin":0,
* "percentEnd":25, "complement":"true"}}</code></p> </li>
* <li><p><b><code>strategy</code></b></p> <p>To change how Amazon ML splits the
* data for a datasource, use the <code>strategy</code> parameter.</p> <p>The
* default value for the <code>strategy</code> parameter is
* <code>sequential</code>, meaning that Amazon ML takes all of the data records
* between the <code>percentBegin</code> and <code>percentEnd</code> parameters for
* the datasource, in the order that the records appear in the input data.</p>
* <p>The following two <code>DataRearrangement</code> lines are examples of
* sequentially ordered training and evaluation datasources:</p> <p>Datasource for
* evaluation: <code>{"splitting":{"percentBegin":70, "percentEnd":100,
* "strategy":"sequential"}}</code></p> <p>Datasource for training:
* <code>{"splitting":{"percentBegin":70, "percentEnd":100,
* "strategy":"sequential", "complement":"true"}}</code></p> <p>To randomly split
* the input data into the proportions indicated by the percentBegin and percentEnd
* parameters, set the <code>strategy</code> parameter to <code>random</code> and
* provide a string that is used as the seed value for the random data splitting
* (for example, you can use the S3 path to your data as the random seed string).
* If you choose the random split strategy, Amazon ML assigns each row of data a
* pseudo-random number between 0 and 100, and then selects the rows that have an
* assigned number between <code>percentBegin</code> and <code>percentEnd</code>.
* Pseudo-random numbers are assigned using both the input seed string value and
* the byte offset as a seed, so changing the data results in a different split.
* Any existing ordering is preserved. The random splitting strategy ensures that
* variables in the training and evaluation data are distributed similarly. It is
* useful in the cases where the input data may have an implicit sort order, which
* would otherwise result in training and evaluation datasources containing
* non-similar data records.</p> <p>The following two
* <code>DataRearrangement</code> lines are examples of non-sequentially ordered
* training and evaluation datasources:</p> <p>Datasource for evaluation:
* <code>{"splitting":{"percentBegin":70, "percentEnd":100, "strategy":"random",
* "randomSeed"="s3://my_s3_path/bucket/file.csv"}}</code></p> <p>Datasource for
* training: <code>{"splitting":{"percentBegin":70, "percentEnd":100,
* "strategy":"random", "randomSeed"="s3://my_s3_path/bucket/file.csv",
* "complement":"true"}}</code></p> </li> </ul>
*/
inline S3DataSpec& WithDataRearrangement(const char* value) { SetDataRearrangement(value); return *this;}
/**
* <p> A JSON string that represents the schema for an Amazon S3
* <code>DataSource</code>. The <code>DataSchema</code> defines the structure of
* the observation data in the data file(s) referenced in the
* <code>DataSource</code>.</p> <p>You must provide either the
* <code>DataSchema</code> or the <code>DataSchemaLocationS3</code>.</p> <p>Define
* your <code>DataSchema</code> as a series of key-value pairs.
* <code>attributes</code> and <code>excludedVariableNames</code> have an array of
* key-value pairs for their value. Use the following format to define your
* <code>DataSchema</code>.</p> <p>{ "version": "1.0",</p> <p>
* "recordAnnotationFieldName": "F1",</p> <p> "recordWeightFieldName": "F2",</p>
* <p> "targetFieldName": "F3",</p> <p> "dataFormat": "CSV",</p> <p>
* "dataFileContainsHeader": true,</p> <p> "attributes": [</p> <p> { "fieldName":
* "F1", "fieldType": "TEXT" }, { "fieldName": "F2", "fieldType": "NUMERIC" }, {
* "fieldName": "F3", "fieldType": "CATEGORICAL" }, { "fieldName": "F4",
* "fieldType": "NUMERIC" }, { "fieldName": "F5", "fieldType": "CATEGORICAL" }, {
* "fieldName": "F6", "fieldType": "TEXT" }, { "fieldName": "F7", "fieldType":
* "WEIGHTED_INT_SEQUENCE" }, { "fieldName": "F8", "fieldType":
* "WEIGHTED_STRING_SEQUENCE" } ],</p> <p> "excludedVariableNames": [ "F6" ] } </p>
* <?oxy_insert_end>
*/
inline const Aws::String& GetDataSchema() const{ return m_dataSchema; }
/**
* <p> A JSON string that represents the schema for an Amazon S3
* <code>DataSource</code>. The <code>DataSchema</code> defines the structure of
* the observation data in the data file(s) referenced in the
* <code>DataSource</code>.</p> <p>You must provide either the
* <code>DataSchema</code> or the <code>DataSchemaLocationS3</code>.</p> <p>Define
* your <code>DataSchema</code> as a series of key-value pairs.
* <code>attributes</code> and <code>excludedVariableNames</code> have an array of
* key-value pairs for their value. Use the following format to define your
* <code>DataSchema</code>.</p> <p>{ "version": "1.0",</p> <p>
* "recordAnnotationFieldName": "F1",</p> <p> "recordWeightFieldName": "F2",</p>
* <p> "targetFieldName": "F3",</p> <p> "dataFormat": "CSV",</p> <p>
* "dataFileContainsHeader": true,</p> <p> "attributes": [</p> <p> { "fieldName":
* "F1", "fieldType": "TEXT" }, { "fieldName": "F2", "fieldType": "NUMERIC" }, {
* "fieldName": "F3", "fieldType": "CATEGORICAL" }, { "fieldName": "F4",
* "fieldType": "NUMERIC" }, { "fieldName": "F5", "fieldType": "CATEGORICAL" }, {
* "fieldName": "F6", "fieldType": "TEXT" }, { "fieldName": "F7", "fieldType":
* "WEIGHTED_INT_SEQUENCE" }, { "fieldName": "F8", "fieldType":
* "WEIGHTED_STRING_SEQUENCE" } ],</p> <p> "excludedVariableNames": [ "F6" ] } </p>
* <?oxy_insert_end>
*/
inline bool DataSchemaHasBeenSet() const { return m_dataSchemaHasBeenSet; }
/**
* <p> A JSON string that represents the schema for an Amazon S3
* <code>DataSource</code>. The <code>DataSchema</code> defines the structure of
* the observation data in the data file(s) referenced in the
* <code>DataSource</code>.</p> <p>You must provide either the
* <code>DataSchema</code> or the <code>DataSchemaLocationS3</code>.</p> <p>Define
* your <code>DataSchema</code> as a series of key-value pairs.
* <code>attributes</code> and <code>excludedVariableNames</code> have an array of
* key-value pairs for their value. Use the following format to define your
* <code>DataSchema</code>.</p> <p>{ "version": "1.0",</p> <p>
* "recordAnnotationFieldName": "F1",</p> <p> "recordWeightFieldName": "F2",</p>
* <p> "targetFieldName": "F3",</p> <p> "dataFormat": "CSV",</p> <p>
* "dataFileContainsHeader": true,</p> <p> "attributes": [</p> <p> { "fieldName":
* "F1", "fieldType": "TEXT" }, { "fieldName": "F2", "fieldType": "NUMERIC" }, {
* "fieldName": "F3", "fieldType": "CATEGORICAL" }, { "fieldName": "F4",
* "fieldType": "NUMERIC" }, { "fieldName": "F5", "fieldType": "CATEGORICAL" }, {
* "fieldName": "F6", "fieldType": "TEXT" }, { "fieldName": "F7", "fieldType":
* "WEIGHTED_INT_SEQUENCE" }, { "fieldName": "F8", "fieldType":
* "WEIGHTED_STRING_SEQUENCE" } ],</p> <p> "excludedVariableNames": [ "F6" ] } </p>
* <?oxy_insert_end>
*/
inline void SetDataSchema(const Aws::String& value) { m_dataSchemaHasBeenSet = true; m_dataSchema = value; }
/**
* <p> A JSON string that represents the schema for an Amazon S3
* <code>DataSource</code>. The <code>DataSchema</code> defines the structure of
* the observation data in the data file(s) referenced in the
* <code>DataSource</code>.</p> <p>You must provide either the
* <code>DataSchema</code> or the <code>DataSchemaLocationS3</code>.</p> <p>Define
* your <code>DataSchema</code> as a series of key-value pairs.
* <code>attributes</code> and <code>excludedVariableNames</code> have an array of
* key-value pairs for their value. Use the following format to define your
* <code>DataSchema</code>.</p> <p>{ "version": "1.0",</p> <p>
* "recordAnnotationFieldName": "F1",</p> <p> "recordWeightFieldName": "F2",</p>
* <p> "targetFieldName": "F3",</p> <p> "dataFormat": "CSV",</p> <p>
* "dataFileContainsHeader": true,</p> <p> "attributes": [</p> <p> { "fieldName":
* "F1", "fieldType": "TEXT" }, { "fieldName": "F2", "fieldType": "NUMERIC" }, {
* "fieldName": "F3", "fieldType": "CATEGORICAL" }, { "fieldName": "F4",
* "fieldType": "NUMERIC" }, { "fieldName": "F5", "fieldType": "CATEGORICAL" }, {
* "fieldName": "F6", "fieldType": "TEXT" }, { "fieldName": "F7", "fieldType":
* "WEIGHTED_INT_SEQUENCE" }, { "fieldName": "F8", "fieldType":
* "WEIGHTED_STRING_SEQUENCE" } ],</p> <p> "excludedVariableNames": [ "F6" ] } </p>
* <?oxy_insert_end>
*/
inline void SetDataSchema(Aws::String&& value) { m_dataSchemaHasBeenSet = true; m_dataSchema = std::move(value); }
/**
* <p> A JSON string that represents the schema for an Amazon S3
* <code>DataSource</code>. The <code>DataSchema</code> defines the structure of
* the observation data in the data file(s) referenced in the
* <code>DataSource</code>.</p> <p>You must provide either the
* <code>DataSchema</code> or the <code>DataSchemaLocationS3</code>.</p> <p>Define
* your <code>DataSchema</code> as a series of key-value pairs.
* <code>attributes</code> and <code>excludedVariableNames</code> have an array of
* key-value pairs for their value. Use the following format to define your
* <code>DataSchema</code>.</p> <p>{ "version": "1.0",</p> <p>
* "recordAnnotationFieldName": "F1",</p> <p> "recordWeightFieldName": "F2",</p>
* <p> "targetFieldName": "F3",</p> <p> "dataFormat": "CSV",</p> <p>
* "dataFileContainsHeader": true,</p> <p> "attributes": [</p> <p> { "fieldName":
* "F1", "fieldType": "TEXT" }, { "fieldName": "F2", "fieldType": "NUMERIC" }, {
* "fieldName": "F3", "fieldType": "CATEGORICAL" }, { "fieldName": "F4",
* "fieldType": "NUMERIC" }, { "fieldName": "F5", "fieldType": "CATEGORICAL" }, {
* "fieldName": "F6", "fieldType": "TEXT" }, { "fieldName": "F7", "fieldType":
* "WEIGHTED_INT_SEQUENCE" }, { "fieldName": "F8", "fieldType":
* "WEIGHTED_STRING_SEQUENCE" } ],</p> <p> "excludedVariableNames": [ "F6" ] } </p>
* <?oxy_insert_end>
*/
inline void SetDataSchema(const char* value) { m_dataSchemaHasBeenSet = true; m_dataSchema.assign(value); }
/**
* <p> A JSON string that represents the schema for an Amazon S3
* <code>DataSource</code>. The <code>DataSchema</code> defines the structure of
* the observation data in the data file(s) referenced in the
* <code>DataSource</code>.</p> <p>You must provide either the
* <code>DataSchema</code> or the <code>DataSchemaLocationS3</code>.</p> <p>Define
* your <code>DataSchema</code> as a series of key-value pairs.
* <code>attributes</code> and <code>excludedVariableNames</code> have an array of
* key-value pairs for their value. Use the following format to define your
* <code>DataSchema</code>.</p> <p>{ "version": "1.0",</p> <p>
* "recordAnnotationFieldName": "F1",</p> <p> "recordWeightFieldName": "F2",</p>
* <p> "targetFieldName": "F3",</p> <p> "dataFormat": "CSV",</p> <p>
* "dataFileContainsHeader": true,</p> <p> "attributes": [</p> <p> { "fieldName":
* "F1", "fieldType": "TEXT" }, { "fieldName": "F2", "fieldType": "NUMERIC" }, {
* "fieldName": "F3", "fieldType": "CATEGORICAL" }, { "fieldName": "F4",
* "fieldType": "NUMERIC" }, { "fieldName": "F5", "fieldType": "CATEGORICAL" }, {
* "fieldName": "F6", "fieldType": "TEXT" }, { "fieldName": "F7", "fieldType":
* "WEIGHTED_INT_SEQUENCE" }, { "fieldName": "F8", "fieldType":
* "WEIGHTED_STRING_SEQUENCE" } ],</p> <p> "excludedVariableNames": [ "F6" ] } </p>
* <?oxy_insert_end>
*/
inline S3DataSpec& WithDataSchema(const Aws::String& value) { SetDataSchema(value); return *this;}
/**
* <p> A JSON string that represents the schema for an Amazon S3
* <code>DataSource</code>. The <code>DataSchema</code> defines the structure of
* the observation data in the data file(s) referenced in the
* <code>DataSource</code>.</p> <p>You must provide either the
* <code>DataSchema</code> or the <code>DataSchemaLocationS3</code>.</p> <p>Define
* your <code>DataSchema</code> as a series of key-value pairs.
* <code>attributes</code> and <code>excludedVariableNames</code> have an array of
* key-value pairs for their value. Use the following format to define your
* <code>DataSchema</code>.</p> <p>{ "version": "1.0",</p> <p>
* "recordAnnotationFieldName": "F1",</p> <p> "recordWeightFieldName": "F2",</p>
* <p> "targetFieldName": "F3",</p> <p> "dataFormat": "CSV",</p> <p>
* "dataFileContainsHeader": true,</p> <p> "attributes": [</p> <p> { "fieldName":
* "F1", "fieldType": "TEXT" }, { "fieldName": "F2", "fieldType": "NUMERIC" }, {
* "fieldName": "F3", "fieldType": "CATEGORICAL" }, { "fieldName": "F4",
* "fieldType": "NUMERIC" }, { "fieldName": "F5", "fieldType": "CATEGORICAL" }, {
* "fieldName": "F6", "fieldType": "TEXT" }, { "fieldName": "F7", "fieldType":
* "WEIGHTED_INT_SEQUENCE" }, { "fieldName": "F8", "fieldType":
* "WEIGHTED_STRING_SEQUENCE" } ],</p> <p> "excludedVariableNames": [ "F6" ] } </p>
* <?oxy_insert_end>
*/
inline S3DataSpec& WithDataSchema(Aws::String&& value) { SetDataSchema(std::move(value)); return *this;}
/**
* <p> A JSON string that represents the schema for an Amazon S3
* <code>DataSource</code>. The <code>DataSchema</code> defines the structure of
* the observation data in the data file(s) referenced in the
* <code>DataSource</code>.</p> <p>You must provide either the
* <code>DataSchema</code> or the <code>DataSchemaLocationS3</code>.</p> <p>Define
* your <code>DataSchema</code> as a series of key-value pairs.
* <code>attributes</code> and <code>excludedVariableNames</code> have an array of
* key-value pairs for their value. Use the following format to define your
* <code>DataSchema</code>.</p> <p>{ "version": "1.0",</p> <p>
* "recordAnnotationFieldName": "F1",</p> <p> "recordWeightFieldName": "F2",</p>
* <p> "targetFieldName": "F3",</p> <p> "dataFormat": "CSV",</p> <p>
* "dataFileContainsHeader": true,</p> <p> "attributes": [</p> <p> { "fieldName":
* "F1", "fieldType": "TEXT" }, { "fieldName": "F2", "fieldType": "NUMERIC" }, {
* "fieldName": "F3", "fieldType": "CATEGORICAL" }, { "fieldName": "F4",
* "fieldType": "NUMERIC" }, { "fieldName": "F5", "fieldType": "CATEGORICAL" }, {
* "fieldName": "F6", "fieldType": "TEXT" }, { "fieldName": "F7", "fieldType":
* "WEIGHTED_INT_SEQUENCE" }, { "fieldName": "F8", "fieldType":
* "WEIGHTED_STRING_SEQUENCE" } ],</p> <p> "excludedVariableNames": [ "F6" ] } </p>
* <?oxy_insert_end>
*/
inline S3DataSpec& WithDataSchema(const char* value) { SetDataSchema(value); return *this;}
/**
* <p>Describes the schema location in Amazon S3. You must provide either the
* <code>DataSchema</code> or the <code>DataSchemaLocationS3</code>.</p>
*/
inline const Aws::String& GetDataSchemaLocationS3() const{ return m_dataSchemaLocationS3; }
/**
* <p>Describes the schema location in Amazon S3. You must provide either the
* <code>DataSchema</code> or the <code>DataSchemaLocationS3</code>.</p>
*/
inline bool DataSchemaLocationS3HasBeenSet() const { return m_dataSchemaLocationS3HasBeenSet; }
/**
* <p>Describes the schema location in Amazon S3. You must provide either the
* <code>DataSchema</code> or the <code>DataSchemaLocationS3</code>.</p>
*/
inline void SetDataSchemaLocationS3(const Aws::String& value) { m_dataSchemaLocationS3HasBeenSet = true; m_dataSchemaLocationS3 = value; }
/**
* <p>Describes the schema location in Amazon S3. You must provide either the
* <code>DataSchema</code> or the <code>DataSchemaLocationS3</code>.</p>
*/
inline void SetDataSchemaLocationS3(Aws::String&& value) { m_dataSchemaLocationS3HasBeenSet = true; m_dataSchemaLocationS3 = std::move(value); }
/**
* <p>Describes the schema location in Amazon S3. You must provide either the
* <code>DataSchema</code> or the <code>DataSchemaLocationS3</code>.</p>
*/
inline void SetDataSchemaLocationS3(const char* value) { m_dataSchemaLocationS3HasBeenSet = true; m_dataSchemaLocationS3.assign(value); }
/**
* <p>Describes the schema location in Amazon S3. You must provide either the
* <code>DataSchema</code> or the <code>DataSchemaLocationS3</code>.</p>
*/
inline S3DataSpec& WithDataSchemaLocationS3(const Aws::String& value) { SetDataSchemaLocationS3(value); return *this;}
/**
* <p>Describes the schema location in Amazon S3. You must provide either the
* <code>DataSchema</code> or the <code>DataSchemaLocationS3</code>.</p>
*/
inline S3DataSpec& WithDataSchemaLocationS3(Aws::String&& value) { SetDataSchemaLocationS3(std::move(value)); return *this;}
/**
* <p>Describes the schema location in Amazon S3. You must provide either the
* <code>DataSchema</code> or the <code>DataSchemaLocationS3</code>.</p>
*/
inline S3DataSpec& WithDataSchemaLocationS3(const char* value) { SetDataSchemaLocationS3(value); return *this;}
private:
Aws::String m_dataLocationS3;
bool m_dataLocationS3HasBeenSet;
Aws::String m_dataRearrangement;
bool m_dataRearrangementHasBeenSet;
Aws::String m_dataSchema;
bool m_dataSchemaHasBeenSet;
Aws::String m_dataSchemaLocationS3;
bool m_dataSchemaLocationS3HasBeenSet;
};
} // namespace Model
} // namespace MachineLearning
} // namespace Aws