{"metadata":{"title":"multi-class-classification","kernelspec":{"display_name":"Redshift","language":"postgresql","name":"Redshift"},"language_info":{"file_extension":".sql","name":"Redshift"},"version":1},"nbformat":4,"nbformat_minor":0,"cells":[{"metadata":{"displayMode":"maximized","width":12,"isLimitOn":true},"source":["prepare data"],"cell_type":"markdown"},{"metadata":{"displayMode":"maximized","width":12,"isLimitOn":true},"source":["CREATE TABLE IF NOT EXISTS ecommerce_sales\n","(\n","\tinvoiceno VARCHAR(30)   \n","\t,stockcode VARCHAR(30)   \n","\t,description VARCHAR(60)    \n","\t,quantity DOUBLE PRECISION   \n","\t,invoicedate VARCHAR(30)    \n","\t,unitprice    DOUBLE PRECISION\n","\t,customerid BIGINT    \n","\t,country VARCHAR(25)    \n",")\n",";\n","Copy ecommerce_sales\n","From 's3://redshift-ml-multiclass/ecommerce_data.txt'\n","iam_role default delimiter '\\t' IGNOREHEADER 1 region 'us-east-1' maxerror 100;\n","\n","create table ecommerce_sales_data as (\n","  select\n","    t1.stockcode,\n","    t1.description,\n","    t1.invoicedate,\n","    t1.customerid,\n","    t1.country,\n","    t1.sales_amt,\n","    cast(random() * 100 as int) as data_group_id\n","  from\n","    (\n","      select\n","        stockcode,\n","        description,\n","        invoicedate,\n","        customerid,\n","        country,\n","        sum(quantity * unitprice) as sales_amt\n","      from\n","        ecommerce_sales\n","      group by\n","        1,\n","        2,\n","        3,\n","        4,\n","        5\n","    ) t1\n",");\n"," \n","create table ecommerce_sales_training as (\n","  select\n","    a.customerid,\n","    a.country,\n","    a.stockcode,\n","    a.description,\n","    a.invoicedate,\n","    a.sales_amt,\n","    (b.nbr_months_active) as nbr_months_active\n","  from\n","    ecommerce_sales_data a\n","    inner join (\n","      select\n","        customerid,\n","        count(\n","          distinct(\n","            DATE_PART(y, cast(invoicedate as date)) || '-' || LPAD(\n","              DATE_PART(mon, cast(invoicedate as date)),\n","              2,\n","              '00'\n","            )\n","          )\n","        ) as nbr_months_active\n","      from\n","        ecommerce_sales_data\n","      group by\n","        1\n","    ) b on a.customerid = b.customerid\n","  where\n","    a.data_group_id < 80\n",");\n"," \n","create table ecommerce_sales_validation as (\n","  select\n","    a.customerid,\n","    a.country,\n","    a.stockcode,\n","    a.description,\n","    a.invoicedate,\n","    a.sales_amt,\n","    (b.nbr_months_active) as nbr_months_active\n","  from\n","    ecommerce_sales_data a\n","    inner join (\n","      select\n","        customerid,\n","        count(\n","          distinct(\n","            DATE_PART(y, cast(invoicedate as date)) || '-' || LPAD(\n","              DATE_PART(mon, cast(invoicedate as date)),\n","              2,\n","              '00'\n","            )\n","          )\n","        ) as nbr_months_active\n","      from\n","        ecommerce_sales_data\n","      group by\n","        1\n","    ) b on a.customerid = b.customerid\n","  where\n","    a.data_group_id between 80\n","    and 90\n",");\n"," \n","create table ecommerce_sales_prediction as (\n","  select\n","    customerid,\n","    country,\n","    stockcode,\n","    description,\n","    invoicedate,\n","    sales_amt\n","  from\n","    ecommerce_sales_data\n","  where\n","    data_group_id > 90);"],"cell_type":"code","execution_count":0,"outputs":[]},{"metadata":{"displayMode":"maximized","width":12,"isLimitOn":true},"source":["create model - replace <<your-amazon-s3-bucket>> with your S3 bucket"],"cell_type":"markdown"},{"metadata":{"displayMode":"maximized","width":12,"isLimitOn":true},"source":["create model ecommerce_customer_activity\n","from\n","  (\n","select   \n","  customerid,\n","  country,\n","  stockcode,\n","  description,\n","  invoicedate,\n","  sales_amt,\n","  nbr_months_active  \n"," from ecommerce_sales_training)\n"," TARGET nbr_months_active FUNCTION predict_customer_activity\n"," IAM_ROLE default\n"," problem_type MULTICLASS_CLASSIFICATION  \n","  SETTINGS (\n","    S3_BUCKET '<<your-amazon-s3-bucket>>',\n","    S3_GARBAGE_COLLECT OFF,\n","    MAX_RUNTIME 9600\n","    \n","  );"],"cell_type":"code","execution_count":0,"outputs":[]},{"metadata":{"displayMode":"maximized","width":12,"isLimitOn":true},"source":["show model output"],"cell_type":"markdown"},{"metadata":{"displayMode":"maximized","width":12,"isLimitOn":true},"source":["Show model ecommerce_customer_activity;"],"cell_type":"code","execution_count":0,"outputs":[]},{"metadata":{"displayMode":"maximized","width":12,"isLimitOn":true},"source":["run validation query"],"cell_type":"markdown"},{"metadata":{"displayMode":"maximized","width":12,"isLimitOn":true},"source":["select \n"," cast(sum(t1.match)as decimal(7,2)) as predicted_matches\n",",cast(sum(t1.nonmatch) as decimal(7,2)) as predicted_non_matches\n",",cast(sum(t1.match + t1.nonmatch) as decimal(7,2))  as total_predictions\n",",predicted_matches / total_predictions as pct_accuracy\n","from \n","(select   \n","  customerid,\n","  country,\n","  stockcode,\n","  description,\n","  invoicedate,\n","  sales_amt,\n","  nbr_months_active,\n","  predict_customer_activity(customerid, country, stockcode, description, invoicedate, sales_amt) as predicted_months_active,\n","  case when nbr_months_active = predicted_months_active then 1\n","      else 0 end as match,\n","  case when nbr_months_active <> predicted_months_active then 1\n","    else 0 end as nonmatch\n","  from ecommerce_sales_validation\n","  )t1;"],"cell_type":"code","execution_count":0,"outputs":[]},{"metadata":{"displayMode":"maximized","width":12,"isLimitOn":true},"source":["run inference query"],"cell_type":"markdown"},{"metadata":{"displayMode":"maximized","width":12,"isLimitOn":true},"source":["select \n","  customerid,  \n","  predict_customer_activity(customerid, country, stockcode, description, invoicedate, sales_amt) as predicted_months_active\n","  from ecommerce_sales_prediction\n"," where predicted_months_active >=7\n"," group by 1,2\n"," limit 10;"],"cell_type":"code","execution_count":0,"outputs":[]},{"metadata":{"displayMode":"maximized","width":12,"isLimitOn":true},"source":["run probabality queries"],"cell_type":"markdown"},{"metadata":{"displayMode":"maximized","width":12,"isLimitOn":true},"source":["select \n","customerid,\n","predict_customer_activity_prob(customerid, country, stockcode, description, invoicedate, sales_amt) as probabilities\n"," from ecommerce_sales_prediction\n","where customerid in (13993, 17581)\n","group by 1,2;"],"cell_type":"code","execution_count":0,"outputs":[]},{"metadata":{"displayMode":"maximized","width":12,"isLimitOn":true},"source":["Select t1.customerid, prediction.labels[0], prediction.probabilities[0]\n","from (select \n","  customerid,\n","  predict_customer_activity_prob(customerid, country, stockcode, description, invoicedate, sales_amt) as prediction\n","  from ecommerce_sales_prediction\n"," where customerid in(13993, 17581)\n",")t1\n","group by 1,2,3\n","order by 1;"],"cell_type":"code","execution_count":0,"outputs":[]},{"metadata":{"displayMode":"maximized","width":12,"isLimitOn":true},"source":["Select t1.customerid, prediction.labels[0] as labels, cast(prediction.probabilities[0] as decimal(4,2)) as probabilities\n","from (select \n","  customerid,\n","  predict_customer_activity_prob(customerid, country, stockcode, description, invoicedate, sales_amt) as prediction\n","  from ecommerce_sales_prediction\n"," where customerid in(13993, 17581)\n",")t1\n","group by 1,2,3\n","order by 1;"],"cell_type":"code","execution_count":0,"outputs":[]},{"metadata":{"displayMode":"maximized","width":12,"isLimitOn":true},"source":["Select cast(prediction.probabilities[0] as decimal(4,1)) as probabilities, count(*) \n","from (select \n","  customerid,\n","  predict_customer_activity_prob(customerid, country, stockcode, description, invoicedate, sales_amt) as prediction\n","  from ecommerce_sales_prediction\n",")t1\n","group by 1\n","order by 1 desc;"],"cell_type":"code","execution_count":0,"outputs":[]},{"metadata":{"displayMode":"maximized","width":12,"isLimitOn":true},"source":["run explainability report"],"cell_type":"markdown"},{"metadata":{"displayMode":"maximized","width":12,"isLimitOn":true},"source":["select json_table.report.explanations.kernel_shap.label0.global_shap_values \n","from (select explain_model('ecommerce_customer_activity') as report) as json_table;"],"cell_type":"code","execution_count":0,"outputs":[]}]}